In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import swat.cas.datamsghandlers as dmh
from swat import *
import dlpy
from dlpy import Sequential
from dlpy.layers import * 
from dlpy.model import Optimizer, AdamSolver, Sequence
from dlpy.timeseries import plot_timeseries
from dlpy.metrics import accuracy_score, roc_auc_score, confusion_matrix
from getpass import getpass
%matplotlib inline

# Parameters

In [8]:
period = 30
seq_len = 50

# Python - Prepare Train Data

In [None]:
train = pd.read_csv('/sasdata/pdm/PM_train.txt',sep=' ')
train.drop(train.columns[[26,27]], axis=1, inplace=True)
train.shape

In [None]:
train['ttf'] = train.groupby(['id'])['cycle'].transform(max)-train['cycle']
train.shape

In [None]:
train['nearfailure'] = train['ttf'].apply(lambda x: 1 if x <= period else 0)
train['age'] = train['cycle']
train.shape

# Python - Prepare Test Data

In [2]:
test = pd.read_csv('/sasdata/pdm/PM_test.txt',sep=' ')
test.drop(test.columns[[26,27]], axis=1, inplace=True)
test.shape

(13096, 26)

In [3]:
more = pd.read_csv('/sasdata/pdm/PM_truth.txt',sep=' ')
more.drop(more.columns[1], axis=1, inplace=True)
more.columns = ['more']
more['id'] = more.index + 1
more.shape

(100, 2)

In [4]:
rul = pd.DataFrame(test.groupby('id')['cycle'].max()).reset_index()
rul.columns = ['id', 'max']
rul.shape

(100, 2)

In [5]:
more['rtf'] = more['more'] + rul['max']
more.shape

(100, 3)

In [6]:
more.drop('more', axis=1, inplace=True)
test = test.merge(more,on=['id'],how='left')
test['ttf'] = test['rtf'] - test['cycle']
test.drop('rtf', axis=1, inplace=True)
test.shape

(13096, 27)

In [9]:
test['nearfailure'] = test['ttf'].apply(lambda x: 1 if x <= period else 0)
test['age'] = test['cycle']
test.shape

(13096, 29)

In [19]:
for x in ['ttf', 'nearfailure', 'age']: test.drop(x, axis=1, inplace=True)

In [22]:
test.to_csv('/sasdata/pdm/test_stream.csv', index=False)

# Connect to SAS Viya

In [None]:
sas = CAS('localhost', 5570, 'sasdemo', getpass('Please enter your passwod:'))

# Load Data

In [None]:
ts_train = dlpy.TimeseriesTable.from_pandas(sas, train)

In [None]:
ts_train.shape

# Preprocess Data

In [None]:
series = ts_train.columns.drop(['id','cycle','ttf']).values.tolist()

In [None]:
ts_train.timeseries_formatting(timeid='cycle',
                               timeseries=series)

In [None]:
ts_train.timeseries_accumlation(acc_interval='obs',
                                groupby='id')

In [None]:
ts_train.prepare_subsequences(seq_len=seq_len, 
                              target='nearfailure', 
                              predictor_timeseries=series)

In [None]:
ts_train.shape

# Model 1

In [None]:
model = Sequential(sas, model_table='lstm_rnn')

model.add(InputLayer(std='STD'))
model.add(Recurrent(rnn_type='LSTM', output_type='samelength', n=100, dropout=0.1, act='sigmoid'))
model.add(Recurrent(rnn_type='LSTM', output_type='encoding',  n=50, dropout=0.1, act='sigmoid'))
model.add(OutputLayer(n=1, full_connect=True, act='sigmoid', error='entropy'))

model.plot_network()

In [None]:
optimizer = Optimizer(algorithm=AdamSolver(), 
                      max_epochs=100, 
                      mini_batch_size=200,
                      seed=1234)

In [None]:
seq_spec  = Sequence(**ts_train.sequence_opt)

In [None]:
result = model.train(ts_train, 
                     optimizer=optimizer,
                     sequence=seq_spec, 
                     **ts_train.inputs_target,
                     model='lstm_rnn',
                     model_weights=sas.CASTable('rnn_weights', replace=True))

In [None]:
model.plot_training_history(tick_frequency=1)

In [None]:
result

# Model 2

In [None]:
name = 'lstm_002'

In [None]:
model = Sequential(sas, model_table=name)

model.add(InputLayer(std='STD'))
model.add(Recurrent(rnn_type='LSTM', output_type='samelength', n=100, dropout=0.1, act='tanh'))
model.add(Recurrent(rnn_type='LSTM', output_type='encoding',  n=50, dropout=0.1, act='tanh'))
model.add(OutputLayer(n=1, full_connect=True, act='sigmoid', error='entropy'))

model.plot_network()

In [None]:
optimizer = Optimizer(algorithm=AdamSolver(), 
                      max_epochs=100, 
                      mini_batch_size=200,
                      seed=1234)

In [None]:
seq_spec  = Sequence(**ts_train.sequence_opt)

In [None]:
result = model.train(ts_train, 
                     optimizer=optimizer,
                     sequence=seq_spec, 
                     **ts_train.inputs_target,
                     model=name,
                     model_weights=name+'_weights')

In [None]:
# model.plot_training_history(tick_frequency=1)
result.OptIterHistory[['FitError', 'Loss']][0:].plot(figsize=(12,5))
ax = plt.gca()
ax.get_xaxis().set_tick_params(labelsize=14)
ax.get_yaxis().set_tick_params(labelsize=14)
ax.legend(['Fit Error', 'Fit Loss'], loc='upper right', prop={'size': 14})
ax.set_xlabel('Iterations',fontsize=16)
ax.set_ylabel('Error / Loss',fontsize=16)
plt.show()

In [None]:
result

# Evaluate

In [None]:
ts_test = dlpy.TimeseriesTable.from_pandas(sas, test)

In [None]:
ts_test.shape

In [None]:
ts_test.timeseries_formatting(timeid='cycle',
                               timeseries=series)

In [None]:
ts_test.timeseries_accumlation(acc_interval='obs',
                                groupby='id')

In [None]:
ts_test.prepare_subsequences(seq_len=seq_len, 
                              target='nearfailure', 
                              predictor_timeseries=series)

In [None]:
ts_test.shape

In [None]:
model.score(ts_test,
            model=name,
            init_weights=name+'_weights', 
            copy_vars=['id','cycle','nearfailure'],
            casout=dict(name='lstm_pred', replace=True))

In [None]:
pred = sas.CASTable('lstm_pred')

In [None]:
sas.loadactionset('fedSql')

In [None]:
sas.fedsql.execdirect('create table lstm_pred {options replace=true} as select '
                      '   *, '
                      '   case when _DL_Pred_ < 0.5 then 0 else 1 end as nf_pred '
                      'from lstm_pred')

In [None]:
acc = accuracy_score(ts_test.nearfailure, pred.nf_pred, id_vars='id')
auc = roc_auc_score(ts_test.nearfailure, pred._DL_Pred_, id_vars='id', pos_label=1)
cmx = confusion_matrix(ts_test.nearfailure, pred.nf_pred, id_vars='id', labels=[0,1])

In [None]:
print('Accuracy Score: %.4f' % acc)
print('AUC Score: %.4f' % auc)
print('\nConfusion Matrix:\n%s' % cmx)

# Save Model

In [None]:
model.save_to_table('/sasdata/pdm')