### Background Processing for Forecast Lab

The purpose of this notebook is to do the time-consuming steps in the background as to not disrupt the flow of the lab.

**Inputs needed:**

**Bucket Name** - A name for an S3 bucket generated by the CloudFormation template.

**Region** - Default is us-east-1, but available to change it here.

**Project** - Name of the background project, as to not conflict with main Lab exercises.

In [None]:
bucket_name = "forecastdemo-firstname-lastname"
region = "us-east-1"
project = 'bp_forecastdemo_bg'

In [None]:
import sys
import os
import json
import time
import pandas as pd
import boto3

session = boto3.Session(region_name=region) 
forecast = session.client(service_name='forecast') 
forecastquery = session.client(service_name='forecastquery')

df = pd.read_csv("item-demand-time.csv", dtype = object, names=['timestamp','value','item'])
df.head(3)

jan_to_oct = df[(df['timestamp'] >= '2014-01-01') & (df['timestamp'] <= '2014-10-31')]

df = pd.read_csv("item-demand-time.csv", dtype = object, names=['timestamp','value','item'])
remaining_df = df[(df['timestamp'] >= '2014-10-31') & (df['timestamp'] <= '2014-12-01')]

jan_to_oct.to_csv("item-demand-time-train.csv", header=False, index=False)
remaining_df.to_csv("item-demand-time-validation.csv", header=False, index=False)

key="elec_data/item-demand-time-train.csv"

boto3.Session().resource('s3').Bucket(bucket_name).Object(key).upload_file("item-demand-time-train.csv")

DATASET_FREQUENCY = "H" 
TIMESTAMP_FORMAT = "yyyy-MM-dd hh:mm:ss"

datasetName= project+'_ds'
datasetGroupName= project +'_dsg'
s3DataPath = "s3://"+bucket_name+"/"+key

create_dataset_group_response = forecast.create_dataset_group(DatasetGroupName=datasetGroupName,
                                                              Domain="CUSTOM",
                                                             )
datasetGroupArn = create_dataset_group_response['DatasetGroupArn']

forecast.describe_dataset_group(DatasetGroupArn=datasetGroupArn)

schema ={
   "Attributes":[
      {
         "AttributeName":"timestamp",
         "AttributeType":"timestamp"
      },
      {
         "AttributeName":"target_value",
         "AttributeType":"float"
      },
      {
         "AttributeName":"item_id",
         "AttributeType":"string"
      }
   ]
}

response=forecast.create_dataset(
                    Domain="CUSTOM",
                    DatasetType='TARGET_TIME_SERIES',
                    DatasetName=datasetName,
                    DataFrequency=DATASET_FREQUENCY, 
                    Schema = schema
)

datasetArn = response['DatasetArn']
forecast.describe_dataset(DatasetArn=datasetArn)

forecast.update_dataset_group(DatasetGroupArn=datasetGroupArn, DatasetArns=[datasetArn])

iam = boto3.client("iam")

role_name = "ForecastRoleDemo"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "forecast.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

try:
    create_role_response = iam.create_role(
        RoleName = role_name,
        AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
    )
    role_arn = create_role_response["Role"]["Arn"]
except iam.exceptions.EntityAlreadyExistsException:
    print("The role " + role_name + " exists, ignore to create it")
    role_arn = boto3.resource('iam').Role(role_name).arn
    
policy_arn = "arn:aws:iam::aws:policy/AmazonForecastFullAccess"
iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = policy_arn
)

iam.attach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',
    RoleName=role_name
)
time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

print(role_arn)

datasetImportJobName = 'EP_DSIMPORT_JOB_TARGET'
ds_import_job_response=forecast.create_dataset_import_job(DatasetImportJobName=datasetImportJobName,
                                                          DatasetArn=datasetArn,
                                                          DataSource= {
                                                              "S3Config" : {
                                                                 "Path":s3DataPath,
                                                                 "RoleArn": role_arn
                                                              } 
                                                          },
                                                          TimestampFormat=TIMESTAMP_FORMAT
                                                         )

ds_import_job_arn=ds_import_job_response['DatasetImportJobArn']
print(ds_import_job_arn)

status = None
max_time = time.time() + 3*60*60 # 3 hours

while time.time() < max_time:
    describe_dataset_import_job_response = forecast.describe_dataset_import_job(
        DatasetImportJobArn = ds_import_job_arn
    )
    
    status = describe_dataset_import_job_response["Status"]
    print("DatasetImportJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)
    



forecast.describe_dataset_import_job(DatasetImportJobArn=ds_import_job_arn)

project_bg = project
datasetGroupArn_bg = datasetGroupArn
datasetArn_bg = datasetArn
role_name_bg = role_name
key_bg = key
bucket_name_bg = bucket_name
region_bg = region
ds_import_job_arn_bg = ds_import_job_arn

%store project_bg
%store datasetGroupArn_bg
%store datasetArn_bg
%store role_name_bg
%store key_bg
%store bucket_name_bg
%store region_bg
%store ds_import_job_arn_bg

predictorName= project+'_deeparp_algo'
forecastHorizon = 24
algorithmArn = 'arn:aws:forecast:::algorithm/Deep_AR_Plus'

create_predictor_response=forecast.create_predictor(PredictorName=predictorName, 
                                                  AlgorithmArn=algorithmArn,
                                                  ForecastHorizon=forecastHorizon,
                                                  PerformAutoML= False,
                                                  PerformHPO=False,
                                                  EvaluationParameters= {"NumberOfBacktestWindows": 1, 
                                                                         "BackTestWindowOffset": 24}, 
                                                  InputDataConfig= {"DatasetGroupArn": datasetGroupArn},
                                                  FeaturizationConfig= {"ForecastFrequency": "H", 
                                                                        "Featurizations": 
                                                                        [
                                                                          {"AttributeName": "target_value", 
                                                                           "FeaturizationPipeline": 
                                                                            [
                                                                              {"FeaturizationMethodName": "filling", 
                                                                               "FeaturizationMethodParameters": 
                                                                                {"frontfill": "none", 
                                                                                 "middlefill": "zero", 
                                                                                 "backfill": "zero"}
                                                                              }
                                                                            ]
                                                                          }
                                                                        ]
                                                                       }
                                                 )

predictor_arn = create_predictor_response['PredictorArn']

timer = 0
status = None
max_time = time.time() + 3*60*60 # 3 hours

while time.time() < max_time:
    describe_predictor_version_response = forecast.describe_predictor(PredictorArn = predictor_arn)
    status = describe_predictor_version_response["Status"]
    print("PredictorVersion: {}, time take: {} min".format(status, timer))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
    timer += 1
    time.sleep(60)

predictor_arn_bg = predictor_arn
%store predictor_arn_bg

forecast.get_accuracy_metrics(PredictorArn=predictor_arn)

forecastName= project+'_deeparp_algo_forecast'

create_forecast_response=forecast.create_forecast(ForecastName=forecastName,
                                                  PredictorArn=predictor_arn)
forecast_arn = create_forecast_response['ForecastArn']

timer = 0
status = None
max_time = time.time() + 3*60*60 # 3 hours

while time.time() < max_time:
    describe_forecast_response = forecast.describe_forecast(ForecastArn=forecast_arn)
    status = describe_forecast_response["Status"]
    print("Forecast: {}, time take: {} min".format(status, timer))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    timer += 1
    time.sleep(60)

forecast_arn_bg = forecast_arn
%store forecast_arn_bg