In [13]:
# Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import sklearn as sk
import scipy as sp
from scipy import signal
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeClassifier
from matplotlib.pyplot import clf
from sklearn.metrics import accuracy_score
import tensorflow as tf
import math

# improve progress bar display
import tqdm
import tqdm.auto
tqdm.tqdm = tqdm.auto.tqdm

In [14]:
# Import and organize data

filepath = "/Users/salatiemann/Downloads/Data copy/test"
test_numbers = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20',
      '21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40',
      '41','42','43','44','45','46','47','48','49','50','51','52','53','54','55','56','57','58','59','60',
      '61','62','63','64','65','66','67','68','69','70','71','72']
data = []
for num in test_numbers:
    filename = filepath + num + '.csv'
    d = pd.read_csv(filename, names = ["IND", "time", "FSR_1", "FSR_2", "FSR_3", "FSR_4", "FSR_5", "FSR_6", "FSR_7", 
                                   "FSR_8", "FSR_9", "FSR_10", "FSR_11", "FSR_12","ACC_X", "ACC_Y", "ACC_Z"])
    d = d.drop(columns = ["IND"])
    d = d.to_numpy()
    data.append(d)

In [15]:
np.shape(data)

(72, 7001, 16)

In [16]:
labels = [0,0,0,3,1,1,1,1,1,1,3,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,1,1,1,1,0,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
         1,2,3,3,3,3,3,2,2,2,0,0,1,1,2,2,3,3]

In [17]:
np.shape(labels)
labels = np.asarray(labels)

In [18]:
print(np.shape(data))

(72, 7001, 16)


In [75]:
# Begin feature engineering

def three_quarters(column):
    max = np.max(column)
    three_fourths = .75*max
    e = []
    j=0
    for i in range(len(column)):
        if column[i] <= .75*np.max(column):
            j+=1
    return j/len(column)

In [144]:
# This feature takes the max of each column, takes the data point 10ms after that, and then normalizes the difference

def change_in_max(column):
    max_index = np.argmax(column)
    plus_ten_index = max_index+2
    if plus_ten_index <= 7001:
        plus_ten_value = column[plus_ten_index]
    else:
        plus_ten_index = max_index-2
        plus_ten_value = column[plus_ten_index]
    diff = np.max(column) - plus_ten_value
    #normalized = diff/np.max(column)
    return diff

In [145]:
change_in_max(data[71][:,4])

7.0

In [146]:
# This function takes all 72 data points of size (7001,16) and puts them into a feature-engineered array.
# Desired shape: 72 data points of size (6,16) --> (72,6,16)

def condense(array):
    len_dataset, _, number_of_columns = np.shape(array)
    number_of_features = 5
    condensed_data = np.zeros((len_dataset,number_of_features,number_of_columns))
    for j in range(72):
        data_point = array[j]
        number_of_columns = len(data_point[0,:])
        D = np.zeros((number_of_features,number_of_columns),dtype=np.float64)
        for i in range(number_of_columns):
            column = data_point[:,i]
            D[0,i] = np.mean(column)
            D[1,i] = np.var(column)
            D[2,i] = np.max(column)
            #D[3,i] = np.std(column)
            D[3,i] = three_quarters(column) # feature: percent of data points that are below 75% of the max
            D[4,i] = change_in_max(column)
        condensed_data[j]=D
        print(j, end=" ")
    return condensed_data

In [147]:
featured_data = condense(data)
featured_data
np.shape(featured_data)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 

(72, 5, 16)

In [148]:
nsamples, nx, ny = np.shape(featured_data)
reformated_data = np.reshape(featured_data,(nsamples, nx*ny))

train = reformated_data[0:64]
test = reformated_data[64:72]
train_labels = labels[0:64]
test_labels = labels[64:72]

In [149]:
[X_train, X_test, y_train, y_test] = [train, test, train_labels, test_labels]
model = GaussianNB().fit(train, train_labels)
print("Predicted labels = " + str(model.predict(test)))
print("Correct labels =   " + str(test_labels))

Predicted labels = [0 0 1 1 1 1 1 3]
Correct labels =   [0 0 1 1 2 2 3 3]


In [133]:
total = 0
a = model.predict(test)
for i in range(len(a)):
    if a[i] == test_labels[i]:
        total += 1
percent_correct = total/len(a)
percent_correct

0.625

In [None]:
# Some Tensorflow practice stuff

batch_size = 12
train_dataset = tf.data.Dataset.from_tensor_slices(train)
test_dataset = tf.data.Dataset.from_tensor_slices(test)

In [None]:
features, labels = (train, train_labels)
train_dataset = tf.data.Dataset.from_tensor_slices((features, labels)).repeat().batch(12)

features, labels = (test, test_labels)
test_dataset = tf.data.Dataset.from_tensor_slices((features, labels)).repeat().batch(12)

In [None]:
train_iter = train_dataset.__iter__()
test_iter = test_dataset.__iter__()

train_x, train_y = train_iter.get_next()
test_x, test_y = test_iter.get_next()

In [None]:
list(test_dataset.as_numpy_iterator())

In [None]:
features, labels = next(iter(train_dataset))

In [None]:
test_dataset

In [None]:
test_dataset.values()

In [None]:
def pack_features_vector(features, labels):
    #Pack the features into a single array.
    features = tf.stack(list(features, axis=1))
    return features, labels

In [None]:
train_dataset = train_dataset.map(pack_features_vector)

In [None]:
# # make a simple model
# net = tf.keras.layers.Dense(train_x, 8) # pass the first value from iter.get_next() as input
# net = tf.keras.layers.dense(net, 8)
# prediction = tf.layers.dense(net, 1)
# loss = tf.losses.mean_squared_error(prediction, train_y) # pass the second value from iter.get_net() as label
# train_op = tf.train.AdamOptimizer().minimize(loss)

In [None]:
np.shape(train[1])

l0 = tf.keras.layers.Dense(units=1, input_shape=[1])

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(units = 112016, input_shape=(None, 7001, 16)),
    #tf.keras.layers.Dense(units=112016, input_shape=(None, 7001, 16), activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

In [None]:
model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [None]:
BATCH_SIZE = 12
# train = train.repeat().shuffle(62).batch(BATCH_SIZE)
# test = test.batch(BATCH_SIZE)

In [None]:
model.fit(train, epochs=1, steps_per_epoch=math.ceil(1))