## Reference

1. https://github.com/awslabs/predictive-maintenance-using-machine-learning/blob/master/source/notebooks/sagemaker_predictive_maintenance.ipynb
2. https://dylan-notebook.notebook.us-east-1.sagemaker.aws/notebooks/xgboost_abalone_2020-01-29/xgboost_abalone.ipynb
3. https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html
4. https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost_hyperparameters.html

In [1]:
import sagemaker
from sagemaker import get_execution_role
import boto3

sess = sagemaker.Session()

role = get_execution_role()
print(role) # This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf

arn:aws:iam::023375022819:role/service-role/AmazonSageMaker-ExecutionRole-20191220T213935


In [2]:
import numpy as np
import pandas as pd
import os
import json

In [3]:
!ls cmapss_dataset/

Damage Propagation Modeling.pdf  RUL_FD004.txt	 train_FD001.txt
readme.txt			 test_FD001.txt  train_FD002.txt
RUL_FD001.txt			 test_FD002.txt  train_FD003.txt
RUL_FD002.txt			 test_FD003.txt  train_FD004.txt
RUL_FD003.txt			 test_FD004.txt


In [4]:
data_folder = 'cmapss_dataset'
columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3','s4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14','s15', 's16', 's17', 's18', 's19', 's20', 's21']

## training data

In [5]:
# normalize sensor readings
train_df = []
eps = 0.000001 # for floating point issues during normalization 
for i in range(1,5):
    df = pd.read_csv('{}/train_FD{:03d}.txt'.format(data_folder, i), delimiter=' ', header=None)
    df.drop(df.columns[[26, 27]], axis=1, inplace=True)
    df.columns = columns
    df[columns[2:]]=(df[columns[2:]]-df[columns[2:]].min()+eps)/(df[columns[2:]].max()-df[columns[2:]].min()+eps)
    train_df.append(df)

In [6]:
df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21
0,1,1,0.999926,0.997625,1.0,1.357405e-08,0.130347,0.272082,0.212586,9.337067e-08,...,0.003593,0.993111,0.550773,0.40054,0.0001,0.28866,0.627907,1.0,0.015473,0.015881
1,1,2,0.476147,0.831592,1.0,0.6269852,0.647971,0.634407,0.511781,0.5079366,...,0.450364,0.992395,0.481761,0.351346,0.0001,0.608247,0.8646934,1.0,0.477968,0.481487
2,1,3,0.9999,0.998694,1.0,1.357405e-08,0.123646,0.271245,0.222481,9.337067e-08,...,0.003202,0.993056,0.531031,0.423788,0.0001,0.278351,0.627907,1.0,0.010764,0.018932
3,1,4,0.99981,0.997625,1.0,1.357405e-08,0.121351,0.266168,0.224835,9.337067e-08,...,0.003641,0.993194,0.553707,0.401716,0.0001,0.268041,0.627907,1.0,0.012782,0.018724
4,1,5,0.595275,0.737174,2.5e-08,0.2380888,0.005691,0.033916,0.022025,0.293184,...,0.087492,0.001405,0.04814,0.920536,0.0001,0.030928,2.114165e-09,6.6357e-08,0.130172,0.14556


In [7]:
df.shape

(61249, 26)

In [8]:
train_df[0].shape

(20631, 26)

In [9]:
# compute RUL (remaining useful life)
for i, df in enumerate(train_df):
    rul = pd.DataFrame(df.groupby('id')['cycle'].max()).reset_index()
    rul.columns = ['id', 'max']
    df = df.merge(rul, on=['id'], how='left')
    df['RUL'] = df['max'] - df['cycle']
    df.drop('max', axis=1, inplace=True)
    train_df[i]=df

In [10]:
df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s13,s14,s15,s16,s17,s18,s19,s20,s21,RUL
0,1,1,0.999926,0.997625,1.0,1.357405e-08,0.130347,0.272082,0.212586,9.337067e-08,...,0.993111,0.550773,0.40054,0.0001,0.28866,0.627907,1.0,0.015473,0.015881,320
1,1,2,0.476147,0.831592,1.0,0.6269852,0.647971,0.634407,0.511781,0.5079366,...,0.992395,0.481761,0.351346,0.0001,0.608247,0.8646934,1.0,0.477968,0.481487,319
2,1,3,0.9999,0.998694,1.0,1.357405e-08,0.123646,0.271245,0.222481,9.337067e-08,...,0.993056,0.531031,0.423788,0.0001,0.278351,0.627907,1.0,0.010764,0.018932,318
3,1,4,0.99981,0.997625,1.0,1.357405e-08,0.121351,0.266168,0.224835,9.337067e-08,...,0.993194,0.553707,0.401716,0.0001,0.268041,0.627907,1.0,0.012782,0.018724,317
4,1,5,0.595275,0.737174,2.5e-08,0.2380888,0.005691,0.033916,0.022025,0.293184,...,0.001405,0.04814,0.920536,0.0001,0.030928,2.114165e-09,6.6357e-08,0.130172,0.14556,316


## test data

In [11]:
test_df = []
for i in range(1,5):
    # Load time series
    df = pd.read_csv('{}/test_FD{:03d}.txt'.format(data_folder, i), delimiter=' ', header=None)
    df.drop(df.columns[[26, 27]], axis=1, inplace=True)
    
    # Load the RUL values
    df_rul = pd.read_csv('{}/RUL_FD{:03d}.txt'.format(data_folder, i), delimiter=' ', header=None)    
    df_rul.drop(df_rul.columns[1], axis=1, inplace=True)
    df_rul.index += 1
    
    # Merge RUL and timeseries and compute RUL per timestamp
    df = df.merge(df_rul, left_on=df.columns[0], right_index=True, how='left')
    df.columns = columns + ['RUL_end']
    rul = pd.DataFrame(df.groupby('id')['cycle'].max()).reset_index()
    rul.columns = ['id', 'max']
    df = df.merge(rul, on=['id'], how='left') # We get the number of cycles per series
    df['RUL'] = df['max'] + df['RUL_end'] - df['cycle'] # The RUL is the number of cycles per series + RUL - how many cycles have already ran
    df.drop(['max','RUL_end'], axis=1, inplace=True)
    
    # Normalize
    df[columns[2:]]=(df[columns[2:]]-df[columns[2:]].min()+eps)/(df[columns[2:]].max()-df[columns[2:]].min()+eps)
    test_df.append(df)

## upload processed data to S3 for training

In [12]:
!ls

cmapss_dataset	CMAPSSData.zip	pred_maintenance.ipynb	processed_data


In [26]:
import boto3
import os

bucket = sess.default_bucket()
prefix = 'md-predictive-maintenance'

s3_bucket_resource = boto3.resource('s3').Bucket(bucket)

# Upload raw data files to S3
for subdir, dirs, files in os.walk(data_folder):
    for file in files:
        full_path = os.path.join(subdir, file)
        s3_path = os.path.join(prefix, full_path)
        s3_bucket_resource.Object(s3_path).upload_file(full_path)

# Upload processed test data for inference
for i in range(len(test_df)):
    local_test_file = 'processed_data/test-{}.csv'.format(i)
    test_df[i].to_csv(local_test_file)
    s3_test_file = os.path.join(prefix, 'test', 'test-{}.csv'.format(i))
    s3_bucket_resource.Object(s3_test_file).upload_file(local_test_file)

# Upload processed data for training
for i in range(len(train_df)):
    # split for validation data
    df_temp = train_df[i]
    train, validate = np.split(df_temp.sample(frac=1), [int(.8*len(df_temp))])

    local_train_file = 'processed_data/train-{}.csv'.format(i)
    train.to_csv(local_train_file)
    s3_train_file = os.path.join(prefix, 'train', 'train-{}.csv'.format(i))
    s3_bucket_resource.Object(s3_train_file).upload_file(local_train_file)
    
    local_validation_file = 'processed_data/validation-{}.csv'.format(i)
    df_temp.to_csv(local_validation_file)
    s3_validation_file = os.path.join(prefix, 'validation', 'validation-{}.csv'.format(i))
    s3_bucket_resource.Object(s3_validation_file).upload_file(local_validation_file)

s3_train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train')
print('uploaded training data location: {}'.format(s3_train_data))

uploaded training data location: s3://sagemaker-us-east-1-023375022819/md-predictive-maintenance/train


## set model output location

In [27]:
output_location = 's3://{}/{}/output'.format(bucket, prefix)
print('training artifacts will be uploaded to: {}'.format(output_location))

training artifacts will be uploaded to: s3://sagemaker-us-east-1-023375022819/md-predictive-maintenance/output


## XGBoost Estimator

In [28]:
region = boto3.Session().region_name
bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region,bucket)

print(region)
print(bucket_path)

us-east-1
https://s3-us-east-1.amazonaws.com/sagemaker-us-east-1-023375022819


In [29]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(region, 'xgboost', '0.90-1')

In [33]:
s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket, prefix), content_type='csv')
s3_input_validation = sagemaker.s3_input(s3_data='s3://{}/{}/validation/'.format(bucket, prefix), content_type='csv')

In [34]:
sess = sagemaker.Session()

xgb = sagemaker.estimator.Estimator(container,
                                    role, 
                                    train_instance_count=1, 
                                    train_instance_type='ml.m4.xlarge',
                                    output_path='s3://{}/{}/output'.format(bucket, prefix),
                                    sagemaker_session=sess)
xgb.set_hyperparameters(max_depth=5,
                        eta=0.2,
                        gamma=4,
                        min_child_weight=6,
                        subsample=0.8,
                        silent=0,
                        objective='reg:squarederror',
                        num_round=100)

xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})

2020-01-30 05:04:58 Starting - Starting the training job...
2020-01-30 05:04:59 Starting - Launching requested ML instances......
2020-01-30 05:06:04 Starting - Preparing the instances for training...
2020-01-30 05:06:57 Downloading - Downloading input data...
2020-01-30 05:07:29 Training - Downloading the training image...
2020-01-30 05:07:51 Training - Training image download completed. Training in progress..[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ',

Training seconds: 82
Billable seconds: 82


## hosting sol 1

In [35]:
xgb_predictor = xgb.deploy(initial_instance_count=1,
                           instance_type='ml.m4.xlarge')

-------------------!

In [36]:
# clean up
xgb.delete_endpoint()

## hosting sol 2

In [40]:
%%time
import boto3
from time import gmtime, strftime

model_name = 'sagemaker-xgboost-2020-01-30-05-04-57-984-model'
print(model_name)

model_data = '''https://sagemaker-us-east-1-023375022819.s3.amazonaws.com/md-predictive-maintenance/output/sagemaker-xgboost-2020-01-30-05-04-57-984/output/model.tar.gz'''

primary_container = {
    'Image': container,
    'ModelDataUrl': model_data
}

create_model_response = client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

print(create_model_response['ModelArn'])

sagemaker-xgboost-2020-01-30-05-04-57-984-model
arn:aws:sagemaker:us-east-1:023375022819:model/sagemaker-xgboost-2020-01-30-05-04-57-984-model
CPU times: user 14.2 ms, sys: 0 ns, total: 14.2 ms
Wall time: 408 ms


In [41]:
from time import gmtime, strftime

endpoint_config_name = 'md-XGBoostEndpointConfig-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_config_name)
create_endpoint_config_response = client.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType':'ml.m4.xlarge',
        'InitialVariantWeight':1,
        'InitialInstanceCount':1,
        'ModelName':model_name,
        'VariantName':'AllTraffic'}])

print("Endpoint Config Arn: " + create_endpoint_config_response['EndpointConfigArn'])

md-XGBoostEndpointConfig-2020-01-30-05-25-34
Endpoint Config Arn: arn:aws:sagemaker:us-east-1:023375022819:endpoint-config/md-xgboostendpointconfig-2020-01-30-05-25-34


In [42]:
%%time
import time

endpoint_name = 'md-XGBoostEndpoint-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_name)
create_endpoint_response = client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name)
print(create_endpoint_response['EndpointArn'])

resp = client.describe_endpoint(EndpointName=endpoint_name)
status = resp['EndpointStatus']
print("Status: " + status)

while status=='Creating':
    time.sleep(60)
    resp = client.describe_endpoint(EndpointName=endpoint_name)
    status = resp['EndpointStatus']
    print("Status: " + status)

print("Arn: " + resp['EndpointArn'])
print("Status: " + status)

md-XGBoostEndpoint-2020-01-30-05-25-58
arn:aws:sagemaker:us-east-1:023375022819:endpoint/md-xgboostendpoint-2020-01-30-05-25-58
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: InService
Arn: arn:aws:sagemaker:us-east-1:023375022819:endpoint/md-xgboostendpoint-2020-01-30-05-25-58
Status: InService
CPU times: user 161 ms, sys: 17.1 ms, total: 179 ms
Wall time: 10min 1s


In [45]:
# delete endpoint
client.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '2696e338-3324-4a96-820c-0279bb5d1bfa',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '2696e338-3324-4a96-820c-0279bb5d1bfa',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Thu, 30 Jan 2020 05:38:58 GMT'},
  'RetryAttempts': 0}}