# IoT Equipment Failure Prediction using Sensor data
## 1 Environment Setup
### 1.1 Install packages and Import dependent libraries

In [1]:
!pip install websocket-client

# Import libraries
import pandas as pd
import numpy as np
import pdb
import json
import re
import requests
import sys
import types
import ibm_boto3
import websocket

Collecting websocket-client
  Downloading https://files.pythonhosted.org/packages/8a/a1/72ef9aa26cfe1a75cee09fc1957e4723add9de098c15719416a1ee89386b/websocket_client-0.48.0-py2.py3-none-any.whl (198kB)
[K    100% |████████████████████████████████| 204kB 3.8MB/s eta 0:00:01
[?25hRequirement not upgraded as not directly required: six in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from websocket-client)
Installing collected packages: websocket-client
Successfully installed websocket-client-0.48.0


In [2]:
# Import libraries
from io import StringIO
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from botocore.client import Config



## 2 Create IoT Predictive Analytics Functions

In [3]:
# Function to extract Column names of dataset
def dataset_columns(dataset):
    return list(dataset.columns.values)

# Function to train Logistic regression model
def train_logistic_regression(x_vals, y_vals):
    logistic_regression_model = LogisticRegression()
    logistic_regression_model.fit(x_vals, y_vals)
    return logistic_regression_model

# Function to return Predicted values
def score_data(trained_model, x_vals):
    ypredict = trained_model.predict(x_vals)
    return ypredict

# Function to calculate Prediction accuracy of model
def model_accuracy(trained_model, variables, targets):
    accuracy_score = trained_model.score(variables, targets)
    return accuracy_score

# Function to generate Confusion matrix
def confusion_matrix(actfail, predictfail):
  # Compute Confusion matrix
  print("Actual, Predicted Observations:  ",len(actfail), len(predictfail))
  # print(actfail, predictfail)
  anpn = 0
  anpy = 0
  aypn = 0
  aypy = 0
  
  for i in range(len(actfail)):
      if (actfail[i]==0 and predictfail[i]==0):
          anpn = anpn + 1
      elif (actfail[i]==0 and predictfail[i]==1):
          anpy = anpy + 1
      elif (actfail[i]==1 and predictfail[i]==0):
          aypn = aypn + 1
      else:
          aypy = aypy + 1
  # Confusoin matrix
  print ("--------------------------------------------")
  print ("Confusion Matrix")
  print ("--------------------------------------------")
  print ("              ", "Predicted N", "Predicted Y")
  print ("Actual N      ", anpn,"          ", anpy) 
  print ("Actual Y      ", aypn,"          ", aypy)
  print ("--------------------------------------------")
  print ("Total observations  :  ", anpn+anpy+aypn+aypy)
  print ("False Positives     :  ", anpy)
  print ("False Negatives     :  ", aypn)
  print ("Overall Accuracy    :  ", round((float(anpn+aypy)/float(anpn+anpy+aypn+aypy))*100, 2), "%")
  print ("Sensitivity/Recall  :  ", round((float(aypy)/float(aypn+aypy))*100, 2), "%")
  print ("Specificity         :  ", round((float(anpn)/float(anpn+anpy))*100, 2), "%")
  print ("Precision           :  ", round((float(aypy)/float(anpy+aypy))*100, 2), "%")
  print ("--------------------------------------------")


## 3. Read Configuration parametric values

In [4]:
# Function to Read json parametric values
def f_getconfigval(injsonstr, invarname):
    # paramname, paramvalue
    # Unpack the json parameter values
    # This section requires regex
    for i in range(len(injsonstr)):
        pair = injsonstr[i]
        # Return parametric value
        if pair['paramname'] == invarname:
            return(pair['paramvalue'])

In [5]:
# Configuration parameters

d = [{'paramvalue': "['ATEMP', 'PID', 'OUTPRESSURE', 'INPRESSURE', 'TEMP']", 'paramname': 'features'}, {'paramvalue': 'FAIL', 'paramname': 'target'}, {'paramvalue': '0.7', 'paramname': 'data_size'}]
print(d)


[{'paramname': 'features', 'paramvalue': "['ATEMP', 'PID', 'OUTPRESSURE', 'INPRESSURE', 'TEMP']"}, {'paramname': 'target', 'paramvalue': 'FAIL'}, {'paramname': 'data_size', 'paramvalue': '0.7'}]


In [6]:
# Read JSON configuration parametric values
# Unpack the json parameter values
# This section uses regex
v_feature_list = eval("list("+ f_getconfigval(d, "features") +")")
v_target = str(f_getconfigval(d, "target"))
v_train_datasize = float(f_getconfigval(d, "data_size"))


In [7]:
# Verify configuration parametric values
# print (feature_list, target, train_datasize)
print (v_feature_list, v_target, v_train_datasize)

['ATEMP', 'PID', 'OUTPRESSURE', 'INPRESSURE', 'TEMP'] FAIL 0.7


## 4 Read IoT Sensor data from database

In [8]:
# Read data from DB2 warehouse in BMX
# -----------------------------------
from ibmdbpy import IdaDataBase, IdaDataFrame

# Call function to read data for specific sensor
# @hidden_cell
# The section below needs to be modified:
#    Insert your credentials to read data from your data sources and replace 
#    the idaConnect() section below
# This connection object is used to access your data and contains your credentials.
idadb_d281f6cd34eb4bc98f0183a45598dbb9 = IdaDataBase(dsn='DASHDB;Database=BLUDB;Hostname=<HOST_NAME>;Port=50000;PROTOCOL=TCPIP;UID=<UID>;PWD=<PASSWORD>')

df_iotdata = IdaDataFrame(idadb_d281f6cd34eb4bc98f0183a45598dbb9, '<SCHEMA>.EQUIPMENT_DATA').as_dataframe()

# Check Number of observations read for analysis
print ("Number of Observations :", len(df_iotdata))
# Inspect a few observations
df_iotdata.head()


Number of Observations : 540


Unnamed: 0,FOOTFALL,ATEMP,SELFLR,CLINLR,DOLELR,PID,OUTPRESSURE,INPRESSURE,TEMP,FAIL
0,0,7,7,1,6,6,36,3,1,1
1,190,1,3,3,5,1,20,4,1,0
2,110,3,3,4,6,1,21,4,1,0
3,100,7,5,6,4,1,77,4,1,0
4,31,1,5,4,5,4,21,4,1,0


In [9]:
# Print dataset column names
datacolumns = dataset_columns(df_iotdata)
print ("Data set columns : ", list(datacolumns))

Data set columns :  ['FOOTFALL', 'ATEMP', 'SELFLR', 'CLINLR', 'DOLELR', 'PID', 'OUTPRESSURE', 'INPRESSURE', 'TEMP', 'FAIL']


## 5 Run Failure Prediction algorithm on IoT data
### 5.1 Split data into Training and Test data

In [10]:
# Split Training and Testing data
train_x, test_x, train_y, test_y = train_test_split(df_iotdata[v_feature_list], df_iotdata[v_target], train_size=v_train_datasize)
print ("Train x counts : ", len(train_x), len(train_x.columns.values))
print ("Train y counts : ", len(train_y))
 
print ("Test x counts : ", len(test_x), len(test_x.columns.values))
print ("Test y counts : ", len(test_y))


Train x counts :  378 5
Train y counts :  378
Test x counts :  162 5
Test y counts :  162


### 5.2 Train the Predictive model

In [11]:
# Training Logistic regression model
trained_logistic_regression_model = train_logistic_regression(train_x, train_y)

train_accuracy = model_accuracy(trained_logistic_regression_model, train_x, train_y)

# Testing the logistic regression model
test_accuracy = model_accuracy(trained_logistic_regression_model, test_x, test_y)

print ("Training Accuracy : ", round(train_accuracy * 100, 2), "%")



Training Accuracy :  92.06 %


### 5.3 Score the Test data using the Trained model

In [12]:
# Model accuracy: Score and construct Confusion matrix for Test data
actfail = test_y.values
predictfail = score_data(trained_logistic_regression_model, test_x)

## 6 Confusion matrix for deeper analysis of Prediction accuracy
#####   Confusion matrix outputs below can be used for calculating more customised Accuracy metrics

In [13]:
# Print Count of Actual fails, Predicted fails
# Print Confusion matrix
confusion_matrix(actfail, predictfail)

Actual, Predicted Observations:   162 162
--------------------------------------------
Confusion Matrix
--------------------------------------------
               Predicted N Predicted Y
Actual N       100            8
Actual Y       5            49
--------------------------------------------
Total observations  :   162
False Positives     :   8
False Negatives     :   5
Overall Accuracy    :   91.98 %
Sensitivity/Recall  :   90.74 %
Specificity         :   92.59 %
Precision           :   85.96 %
--------------------------------------------
