In [None]:

import pandas as pd
import os
import os.path
import matplotlib.pyplot as plt

full_file_path = os.path.join(os.getcwd(),"rlog1.csv")
df = pd.read_csv(full_file_path, encoding="ISO-8859-1", na_values=['NA','?'])

pd.set_option('display.max_columns', 11) 
pd.set_option('display.max_rows', 5)
print(df.shape)
display(df)

In [None]:
def formatString(instr, cnt, delim):
    sp = "                                    "
    if (cnt > 32):
        cnt = 32
    s = str(instr) + sp
    o = s[0:cnt] + str(delim)
    return o    
def rformatString(instr, cnt, delim):
    sp = "                                    "
    if (cnt > 32):
        cnt = 32
    s = sp + str(instr) 
    o = s[-cnt:] + str(delim)
    return o    

In [None]:
# Test for Missing Values
def strToInt(str):
    i = 0
    try: 
        i = int(str)
    except ValueError:
        i = 0
    return i

def myfunc(x):
    return strToInt(x.sum())

totrecs = strToInt(df.shape[0])
print("Missing Values")
print("     Total records: ", totrecs)

print(rformatString("Cnt",4,""),formatString("Field Name", 24," "), rformatString("Missing",7," ") ,rformatString("Pct Missing",12, ""))
cnt = 1
for c in df.columns:
    sn = df[c].isna().agg(myfunc)
    p = 0
    if sn != 0:
        p = (sn / totrecs) * 100
        p = format(p,".2f")
    print(rformatString(cnt, 4,""),formatString(c, 24, " "), rformatString(sn, 7, " ") ,rformatString(p, 10, " %"))
    cnt +=1

In [None]:
# if (total column missing values == total records) drop column 
print("Count Unique Indexes and Drop missing columns: ", df.shape)
droplist = []
#df.columns = [x.lower() for x in df.columns]
for c in df.columns:
    n = df[c].isna().sum()
    if n == df.shape[0]:
        droplist.append(c)
print("Dropped Null Columns ",droplist)
print(rformatString("Cnt", 4,""),formatString("Index", 24,"") ,rformatString("Unique", 8,""),rformatString("isNull", 8," "), formatString("Class", 24,""))
df.drop(labels=droplist, axis=1, inplace=True)
cnt = 0
for c in df.columns:
    s = df[c].nunique()
    n = df[c].isna().sum()
    if n == df.shape[0]:
        droplist.append(c)
    print(rformatString(cnt, 4,""),formatString(c, 24," ") ,rformatString(s, 7," "),rformatString(n, 7," "), formatString(type(c), 24,""))
    cnt +=1


In [None]:
print("Filtered column indexes to lower: ")
df.columns = [x.lower() for x in df.columns]
for c in df.columns:
    print(c)

In [None]:
# Count Categorical data input 
# This data needs some explanation
#   000 no sensors hit
#   001 sensor hit on the right
#   100 sensor hit on the left
#   010 sensor hit in the middle
#   110 sensor hit on left and middle 
#   011 sensor hit on right and middle
#   101 sensor hit on left and right
#   111 all sensors hit
#   TOF same as CLIFF this sensor detects if the ground is present 
#         it is an edge detection to prevent robot from going down stairs
#   CTG Clear To Go this means the robot has not hit a sensor
#         for quite some time, this causes the robot to randomly turn
#        
scolumn = "org"                 # org - original hit
nunique = df[scolumn].nunique()
vcounts = pd.value_counts(df[scolumn])
print("Unique " + scolumn + " : ", nunique)
print("Code  Count")
pd.set_option('display.max_rows', 25)
print(vcounts)

In [None]:

# when robot starts averages are 0 zero
# fill them with the mean value
lmean = df['lftavg'].mean()
rmean = df['rhtavg'].mean()
df['lftavg'] = df['lftavg'].replace(0, lmean)
df['rhtavg'] = df['rhtavg'].replace(0, rmean)
print(lmean,rmean)

In [None]:
# Create dummies for Categorical input original hit - org
dummies = pd.get_dummies(df['org'],prefix="in")
pd.set_option('display.max_columns', 9)
print(dummies)

In [None]:
# preserve original dataframe
df2 = pd.concat([df,dummies],axis=1)  # concat dummies 
df2.drop('org', axis=1, inplace=True) # original hit
# drop dturn 'decision turn' this is a turn delay time 
#   it a random number between range(300-1000)
#   computed after turn type
df2.drop('dturn',axis=1,inplace=True) 
print(list(df2.columns))

In [None]:

from scipy.stats import zscore

df2['compss'] = zscore(df2['compss'])   # compass heading in degrees 0-360
df2['pitch'] = zscore(df2['pitch'])     # pitch 
df2['roll'] = zscore(df2['roll'])       # roll
df2['cliff'] = zscore(df2['cliff'])     # edge detector alerts when ground is not sensed
df2['lft'] = zscore(df2['lft'])         # infrared sensor left
df2['ctr'] = zscore(df2['ctr'])         # ultrasonic sensor center
df2['rht'] = zscore(df2['rht'])         # infrared sensor right
df2['lastlft'] = zscore(df2['lastlft']) # time stamp of last hit
df2['lastrht'] = zscore(df2['lastrht']) # time stamp of last hit
df2['lftavg'] = zscore(df2['lftavg'])   # average of the last 20 hits
df2['rhtavg'] = zscore(df2['rhtavg'])   # average of the last 20 hits

# ddir or 'decision direction' is what we are trying to predict
#          also called turntype
x_columns = df2.columns.drop('ddir') 
x = df2[x_columns].values
print(x_columns)

# turntype or 'decision direction' is what we are trying to Classify
dummies = pd.get_dummies(df2['ddir']) 
turntype = dummies.columns
print(turntype)
y = dummies.values


In [None]:
# Classification neural network
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(    
    x, y, test_size=0.25, random_state=42)

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(100, input_dim=x.shape[1], activation='relu',
                kernel_initializer='random_normal'))
model.add(tf.keras.layers.Dense(50,activation='relu',kernel_initializer='random_normal'))
model.add(tf.keras.layers.Dense(25,activation='relu',kernel_initializer='random_normal'))
model.add(tf.keras.layers.Dense(y.shape[1],activation='softmax',
                kernel_initializer='random_normal'))
model.compile(loss='categorical_crossentropy', optimizer='adam',
                metrics =['accuracy'])
monitor = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, 
                        verbose=1, mode='auto', restore_best_weights=True)
model.fit(x_train,y_train,validation_data=(x_test,y_test),
          callbacks=[monitor],verbose=2,epochs=1000)

In [None]:
import numpy as np
pred = model.predict(x_test)
pred = np.argmax(pred,axis=1) 
# raw probabilities to chosen class (highest probability)
from sklearn import metrics

y_compare = np.argmax(y_test,axis=1) 
score = metrics.accuracy_score(y_compare, pred)
print("Accuracy score: {}".format(score))

In [None]:
from IPython.display import display

# Don't display numpy in scientific notation
np.set_printoptions(precision=4)
np.set_printoptions(suppress=True)

# Generate predictions
pred = model.predict(x_test)

print("Numpy array of predictions")
display(pred[0:5])

print("As percent probability")
print(pred[0]*100)

score = metrics.log_loss(y_test, pred)
print("Log loss score: {}".format(score))

# raw probabilities to chosen class (highest probability)
pred = np.argmax(pred,axis=1) 

In [None]:
from matplotlib.pyplot import figure, show
from numpy import arange, sin, pi

#t = arange(1e-5, 5.0, 0.00001)
#t = arange(1.0, 5.0, 0.00001) # computer scientists
t = arange(0.0, 1.0, 0.00001)  # data     scientists

fig = figure(1,figsize=(12, 10))

ax1 = fig.add_subplot(211)
ax1.plot(t, np.log(t))
ax1.grid(True)
ax1.set_ylim((-8, 1.5))
ax1.set_xlim((-0.1, 2))
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_title('log(x)')

show()

In [None]:

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
#from sklearn.metrics import confusion_matrix
from sklearn import metrics

# Compute confusion matrix
cm = metrics.confusion_matrix(y_compare, pred)
np.set_printoptions(precision=2)

# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print('Normalized confusion matrix')
print(cm_normalized)
print(cm_normalized.shape)
#plt.figure()
##plt.plot(cm_normalized)#,products)
#plt.show()
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=cm_normalized)
cm_display.plot()
plt.show()