<a href="https://colab.research.google.com/github/zamalex/BakingFinal/blob/master/k%20fold%20uci%20har(RF%20version).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import sklearn.metrics
from tensorflow.keras.utils import plot_model
import datetime


In [None]:
%load_ext tensorboard


# Some methods and data which will be used below

In [2]:
# (i).   Mapping labels to their resp. classes
# (ii).  Mapping classes to their resp. labels

label_to_class = {
    1  : 'WALKING',
    2  : 'WALKING_UPSTAIRS',
    3  : 'WALKING_DOWNSTAIRS',
    4  : 'SITTING',
    5  : 'STANDING',
    6  : 'LAYING',
    7  : 'STAND_TO_SIT',
    8  : 'SIT_TO_STAND',
    9  : 'SIT_TO_LIE',
    10 : 'LIE_TO_SIT',
    11 : 'STAND_TO_LIE',
    12 : 'LIE_TO_STAND',
    np.nan : np.nan
}
class_to_label = {
    'WALKING' : 1,
    'WALKING_UPSTAIRS' : 2,
    'WALKING_DOWNSTAIRS' : 3,
    'SITTING' : 4,
    'STANDING' : 5,
    'LAYING' : 6,
    'STAND_TO_SIT' : 7,
    'SIT_TO_STAND' : 8,
    'SIT_TO_LIE' : 9,
    'LIE_TO_SIT' : 10,
    'STAND_TO_LIE' : 11,
    'LIE_TO_STAND' : 12,
    np.nan : np.nan
}

In [3]:
# (i).   Removing data-points where y and x values is null

# Other methods can be
# ffill (forward fill) => fills using forward points
# bfill (backward fill) => using backward points
# interpolate

def remove_null(xdata,ydata):
    xdata = xdata[np.where(np.isfinite(ydata))]
    ydata = ydata[np.where(np.isfinite(ydata))]
    ydata = ydata[np.where(np.isfinite(xdata).all(axis = 1).all(axis = 1).all(axis = 1))]
    xdata = xdata[np.where(np.isfinite(xdata).all(axis = 1).all(axis = 1).all(axis = 1))]

    return xdata,ydata

In [4]:
# normalize xdata using sklearn.preprocessing.StandardScaler and returns
# scaler object to use it furthur for testing data

# Each axis of each sensor has different min, max, I scaled according to them seperately
# Initial shape == (None,128,2,3)
# changed to (None , 6) :-
# reshape to (None,128,6) -> swapaxis(0,2) -> reshape(6,-1) -> transpose
# Fit scaler OR transform according to scaler

# Reverse above process to get back oiginal data
# transpose -> reshape(6,128,None) -> swapaxes(0,2) -> reshape(None,128,2,3)

def get_scaler(xdata):

    row = xdata.shape[0]
    timestamp = xdata.shape[1]
    sensor = xdata.shape[2]
    axis = xdata.shape[3]

    scaler = MinMaxScaler(feature_range = (-1,1))
    xdata = xdata.reshape(row,timestamp,sensor*axis)
    xdata = np.swapaxes(xdata,0,2).reshape(sensor*axis,-1).T
    scaler.fit(xdata)
    return scaler

def scale_data(xdata,scaler):

    row = xdata.shape[0]
    timestamp = xdata.shape[1]
    sensor = xdata.shape[2]
    axis = xdata.shape[3]

    xdata = xdata.reshape(row,timestamp,sensor*axis)
    xdata = np.swapaxes(xdata,0,2).reshape(sensor*axis,-1).T
    xdata = scaler.transform(xdata)
    xdata = xdata.T.reshape(sensor*axis,timestamp,row)
    xdata = np.swapaxes(xdata,0,2).reshape(row,timestamp,sensor,axis)

    return xdata

In [5]:
# takes in location, exp no., user no., start and end(end point is excluded from reading i.e lastpoint+1) point
# ,overlap array, and returns xdata and ydata

def create_windows(location, exp, user, start, end, activity, length, overlap):

    acc_file  = location + '/acc_exp'+ str(exp).zfill(2) + '_user' + str(user).zfill(2) + '.txt'
    gyro_file = location + '/gyro_exp'+ str(exp).zfill(2) + '_user' + str(user).zfill(2) + '.txt'

    acc_data  = np.loadtxt(acc_file)
    gyro_data = np.loadtxt(gyro_file)

    xtrain = []
    ytrain = []

    while (start + length <= end) :

        stop = start + length
        window = []

        while start != stop :
            window.append( [acc_data[start] , gyro_data[start]] )
            start += 1

        xtrain.append(window)
        ytrain.append(activity)

        start = stop - overlap[activity-1]

    return xtrain,ytrain

In [6]:
# location == location of file
# lenght == lenght of window
# overlap == array of overlaps of size == number of unique activities
# overlap depends on activity so as to extract more data from a particular class if needed


# (i).   Loading labels.txt as labels
# (ii).  Iterating in labels and calling create_windows on acceleration file, extending returned data in xtrain, ytrain
# (iii). Iterating in labels and calling create_windows on gyroscope file, extending returned data in xtrain, ytrain

def prepare_data(location,length = 128,overlap = [64]*12):

    xdata = []
    ydata = []

    labels = np.loadtxt(location+'/labels.txt',dtype = 'uint32')

    for exp,user,activity,start,end in labels :

        xtemp , ytemp = create_windows(location, exp, user, start, end+1, activity, length, overlap)
        xdata.extend(xtemp)
        ydata.extend(ytemp)

    return np.array(xdata),np.array(ydata)

In [7]:
# (i). Finds max element index sets its 1 and sets remaining 0
#      for each row

def to_categorical(ydata):

    for i in range(len(ydata)):
        j = ydata[i].argmax()
        for k in range(len(ydata[i])):
            ydata[i][k] = (k == j)
    return ydata

In [8]:
# (i).  OneHotEncoding ydata
# (ii). Converting sparsh matrix ydata into dense form and then matrix into numpy array

def one_hot_encoded(ydata):
    ydata = OneHotEncoder().fit_transform(ydata.reshape(len(ydata),1))
    ydata = np.asarray(ydata.todense())
    return ydata

def splitData(xtrain,ytrain,type):
  if(type=='master'):
    xtrain = xtrain[(ytrain==1) | (ytrain==5)|(ytrain==7)]
    ytrain = ytrain[(ytrain==1) | (ytrain==5)|(ytrain==7)]
  elif(type=='dynamic'):
    xtrain = xtrain[(ytrain==1) | (ytrain==2)|(ytrain==3)]
    ytrain = ytrain[(ytrain==1) | (ytrain==2)|(ytrain==3)]
  elif(type=='static'):
    xtrain = xtrain[(ytrain==4) | (ytrain==5)|(ytrain==6)]
    ytrain = ytrain[(ytrain==4) | (ytrain==5)|(ytrain==6)]
  elif(type=='transition'):
    xtrain = xtrain[(ytrain==7) | (ytrain==8)|(ytrain==9)|(ytrain==10)|(ytrain==11)|(ytrain==12)]
    ytrain = ytrain[(ytrain==7) | (ytrain==8)|(ytrain==9)|(ytrain==10)|(ytrain==11)|(ytrain==12)]
  elif(type=='primary'):
    xtrain = xtrain[(ytrain==1) | (ytrain==2)|(ytrain==3)|(ytrain==4)|(ytrain==5)|(ytrain==6)]
    ytrain = ytrain[(ytrain==1) | (ytrain==2)|(ytrain==3)|(ytrain==4)|(ytrain==5)|(ytrain==6)]
  else:
    xtrain = xtrain
    ytrain = ytrain

  #xtrain,xtest,ytrain,ytest = train_test_split(xtrain,ytrain,test_size = 0.3,random_state=42)
  labels = ytrain


  ytrain = one_hot_encoded(ytrain)
  return xtrain,ytrain,labels

def splitData2(xtrain,ytrain,type):
  if(type=='master'):
    xtrain = xtrain[(ytrain==1) | (ytrain==5)|(ytrain==7)]
    ytrain = ytrain[(ytrain==1) | (ytrain==5)|(ytrain==7)]
  elif(type=='dynamic'):
    xtrain = xtrain[(ytrain==1) | (ytrain==2)|(ytrain==3)]
    ytrain = ytrain[(ytrain==1) | (ytrain==2)|(ytrain==3)]
  elif(type=='static'):
    xtrain = xtrain[(ytrain==4) | (ytrain==5)|(ytrain==6)]
    ytrain = ytrain[(ytrain==4) | (ytrain==5)|(ytrain==6)]
  elif(type=='transition'):
    xtrain = xtrain[(ytrain==7) | (ytrain==8)|(ytrain==9)|(ytrain==10)|(ytrain==11)|(ytrain==12)]
    ytrain = ytrain[(ytrain==7) | (ytrain==8)|(ytrain==9)|(ytrain==10)|(ytrain==11)|(ytrain==12)]
  elif(type=='primary'):
    xtrain = xtrain[(ytrain==1) | (ytrain==2)|(ytrain==3)|(ytrain==4)|(ytrain==5)|(ytrain==6)]
    ytrain = ytrain[(ytrain==1) | (ytrain==2)|(ytrain==3)|(ytrain==4)|(ytrain==5)|(ytrain==6)]
  else:
    xtrain = xtrain
    ytrain = ytrain

  #xtrain,xtest,ytrain,ytest = train_test_split(xtrain,ytrain,test_size = 0.3,random_state=42)

  return xtrain,ytrain


def resampleData(x,y):
  x = x.reshape(5773,768)
  df = pd.DataFrame(x)

  from imblearn.over_sampling import RandomOverSampler
  from collections import Counter
  from sklearn.datasets import make_classification
  from imblearn.over_sampling import SMOTE

  oversample = SMOTE()
  df, y = oversample.fit_resample(df, y)
  df=df.to_numpy()
  x=df.reshape(12228,128,6)
  x=df.reshape(12228,128,2,3)
  return x,y

# Data Preparation

## Loading data from files

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
# load a single file as a numpy array
def load_file(filepath):
    dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values

# load a list of files and return as a 3d numpy array
def load_group(filenames, prefix=''):
	loaded = list()
	for name in filenames:
		data = load_file(prefix + name)
		loaded.append(data)
	# stack group so that features are the 3rd dimension
	loaded = dstack(loaded)
	return loaded

# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
	filepath = prefix + group + '/Inertial Signals/'
	# load all 9 files as a single array
	filenames = list()
	# total acceleration
	filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
	# body acceleration
	filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
	# body gyroscope
	filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
	# load input data
	X = load_group(filenames, filepath)
	# load class output
	y = load_file(prefix + group + '/y_'+group+'.txt')
	return X, y
#/content/drive/MyDrive/UCI HAR Dataset
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
    # load all train
    trainX, trainy = load_dataset_group('train', prefix + '/content/drive/MyDrive/UCI HAR Dataset/')
    # load all test
    testX, testy = load_dataset_group('test', prefix + '/content/drive/MyDrive/UCI HAR Dataset/')

    trainylabels = trainy
    testylabels = testy

    # zero-offset class values
    trainy = trainy - 1
    testy = testy - 1
    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)
    print("trainX shape: ",trainX.shape)
    print("trainy shape: ", trainy.shape)
    print("testX shape: ", testX.shape)
    print("testy shape: ", testy.shape)
    print("trainylabels shape: ", trainylabels.shape)
    print("testylabels shape: ", testylabels.shape)

    return trainX, trainy, testX, testy ,trainylabels,testylabels


In [11]:
import numpy as np
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
#from keras.layers.convolutional import Conv1D
#from keras.layers.convolutional import MaxPooling1D
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot
import matplotlib.pyplot as plt
#from keras.utils.vis_utils import plot_model
import keras
import zipfile
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

trainX, trainy, testX, testy, trainylabels , testylabels = load_dataset()

trainX shape:  (7352, 128, 9)
trainy shape:  (7352, 6)
testX shape:  (2947, 128, 9)
testy shape:  (2947, 6)
trainylabels shape:  (7352, 1)
testylabels shape:  (2947, 1)


In [12]:
X = np.concatenate((trainX,testX))
Y= np.concatenate((trainy,testy))
YLabels = np.concatenate((trainylabels,testylabels))

In [13]:
X.shape,Y.shape,YLabels.shape

((10299, 128, 9), (10299, 6), (10299, 1))

In [14]:
yy = YLabels
yy.shape

(10299, 1)

In [15]:
yy =yy.reshape(10299,)

In [16]:
yy

array([5, 5, 5, ..., 2, 2, 2])

**Machine Learning Algorithms**

In [110]:
X_train = pd.read_csv('./sample_data/ucihar72features.csv', header=None)
X_train=X_train.iloc[1: , :]
Y_train = YLabels.reshape(10299,)

  X_train = pd.read_csv('./sample_data/ucihar72features.csv', header=None)


In [111]:
X_train=X_train.reset_index(level=None, drop=True)
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,62,63,64,65,66,67,68,69,70,71
0,1.0193349999999999,1.0193045703124999,0.001790556184033647,3.2060914481811355e-06,1.0193061429969892,1.024606,1.024606,1.012817,-0.12430179999999999,-0.12426187265625,...,0.07485018,-0.009021585,0.0089209935,0.00946815436515625,0.005943238143615174,3.5322079631722343e-05,0.01117890990723826,0.02285864,0.02285864,-0.006890512
1,1.019366,1.01944796875,0.0019319124427684116,3.7322856865234115e-06,1.0194497992908662,1.024606,1.024606,1.012893,-0.1231878,-0.12293556953125001,...,0.03016049,-0.03016049,0.0064976159999999995,0.0063075711740625,0.007430688189073518,5.521512696323667e-05,0.009746824153492298,0.02513287,0.02513287,-0.009759107
2,1.0201324999999999,1.0199265078125,0.0029039851228422242,8.433129593688969e-06,1.0199306419890497,1.027664,1.027664,1.009013,-0.1259181,-0.12499678124999998,...,0.03750257,-0.03750257,0.0024673810000000003,0.004334764210234375,0.008753414795710247,7.662227058575905e-05,0.009767929736852528,0.02860941,0.02860941,-0.009759107
3,1.0204654999999998,1.0203490546874998,0.0025741396600023106,6.626194989196811e-06,1.0203523017059668,1.027664,1.027664,1.009013,-0.1307196,-0.13049271093749998,...,0.03750257,-0.03750257,-0.0018162389999999999,-0.00035190115179687504,0.008186481257612248,6.701847538123661e-05,0.00819404111546144,0.02860941,0.02860941,-0.01322325
4,1.0202010000000001,1.0202553437500002,0.0019486496223246328,3.7972353505859337e-06,1.0202572046723224,1.026194,1.026194,1.013645,-0.13027640000000001,-0.13020888671875,...,0.02058165,-0.02058165,-0.003654157,-0.003957091590625,0.008259952032748819,6.822680758331133e-05,0.009158896300314053,0.01989048,0.02114078,-0.02114078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10294,0.909141,0.972528,0.231131,0.053421,0.999615,1.724845,1.724845,0.633686,-0.246933,-0.277329,...,1.347634,-1.347634,0.107912,0.051575,0.360653,0.130071,0.364322,0.607194,0.95268,-0.95268
10295,0.917021,0.983719,0.225604,0.050897,1.009257,1.724845,1.724845,0.673112,-0.246097,-0.268153,...,1.347634,-1.347634,0.143734,0.012066,0.417144,0.174009,0.417319,0.667866,0.95268,-0.95268
10296,0.916196,0.980682,0.217645,0.047369,1.004543,1.604132,1.604132,0.663282,-0.213419,-0.232996,...,0.945282,-0.815394,0.192875,0.077493,0.389231,0.151501,0.39687,0.667866,0.857202,-0.857202
10297,0.901866,0.958825,0.219793,0.048309,0.983694,1.604132,1.604132,0.635888,-0.212598,-0.225295,...,1.093888,-1.093888,0.194982,0.118935,0.319241,0.101915,0.340676,0.602909,0.709417,-0.709417


In [112]:
folds=3

In [113]:
X.shape,X_train.shape

((10299, 128, 9), (10299, 72))

In [114]:
x11train,y11train=splitData2(X_train,Y_train,'dynamic')
x22train,y22train=splitData2(X_train,Y_train,'static')
x11train=x11train.reset_index(level=None, drop=True)
x22train=x22train.reset_index(level=None, drop=True)

dynamicxtrain1,dynamicytrain1,dynamiclabels1 = splitData(X,yy,'dynamic')
staticxtrain1,staticytrain1,staticlabels1 = splitData(X,yy,'static')


In [115]:
dynamicxtrain1.shape,x11train.shape

((4672, 128, 9), (4672, 72))

In [116]:
import numpy as np
from sklearn.model_selection import KFold
dynamic_fold_train=[]
dynamic_fold_test=[]
kf = KFold(n_splits=folds)
kf.get_n_splits(dynamicxtrain1)
print(kf)
for i, (train_index, test_index) in enumerate(kf.split(dynamicxtrain1)):
    dynamic_fold_train.append(train_index)
    dynamic_fold_test.append(test_index)

static_fold_train=[]
static_fold_test=[]
kf = KFold(n_splits=folds)
kf.get_n_splits(staticxtrain1)
print(kf)
for i, (train_index, test_index) in enumerate(kf.split(staticxtrain1)):
    static_fold_train.append(train_index)
    static_fold_test.append(test_index)


KFold(n_splits=3, random_state=None, shuffle=False)
KFold(n_splits=3, random_state=None, shuffle=False)


In [117]:
import numpy as np
from sklearn.model_selection import KFold
dynamic_fold_train2=[]
dynamic_fold_test2=[]
kf = KFold(n_splits=folds)
kf.get_n_splits(x11train)
print(kf)
for i, (train_index, test_index) in enumerate(kf.split(x11train)):
    dynamic_fold_train2.append(train_index)
    dynamic_fold_test2.append(test_index)
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}")
    print(f"  Test:  index={test_index}")

static_fold_train2=[]
static_fold_test2=[]
kf = KFold(n_splits=folds)
kf.get_n_splits(x22train)
print(kf)
for i, (train_index, test_index) in enumerate(kf.split(x22train)):
    static_fold_train2.append(train_index)
    static_fold_test2.append(test_index)

KFold(n_splits=3, random_state=None, shuffle=False)
Fold 0:
  Train: index=[1558 1559 1560 ... 4669 4670 4671]
  Test:  index=[   0    1    2 ... 1555 1556 1557]
Fold 1:
  Train: index=[   0    1    2 ... 4669 4670 4671]
  Test:  index=[1558 1559 1560 ... 3112 3113 3114]
Fold 2:
  Train: index=[   0    1    2 ... 3112 3113 3114]
  Test:  index=[3115 3116 3117 ... 4669 4670 4671]
KFold(n_splits=3, random_state=None, shuffle=False)


In [108]:
x11train.iloc[1558]

0       0.9031991500000001
1       0.9217707898437499
2      0.20980772760598976
3     0.044019282563189197
4       0.9453468525215282
              ...         
67     0.11082189811148971
68      0.3335200405397904
69               0.5896611
70               0.7846509
71              -0.7846509
Name: 1558, Length: 72, dtype: object

In [109]:
x11train.iloc[dynamic_fold_train2[0]]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,62,63,64,65,66,67,68,69,70,71
1558,0.9031991500000001,0.9217707898437499,0.20980772760598976,0.044019282563189197,0.9453468525215282,1.399791,1.399791,0.5777828,0.17023225,0.17352569003359375,...,1.068341,-0.7894977,-0.019498345,-0.0203400916953125,0.3328992311668648,0.11082189811148971,0.3335200405397904,0.5896611,0.7846509,-0.7846509
1559,0.8841147,0.9186416109375,0.21876554081955074,0.047858361850070516,0.9443308589662395,1.351825,1.351825,0.5443351,0.17389415000000003,0.18276287601562502,...,0.9961656,-0.8954826,0.029533335,0.0717680771484375,0.3193787606268166,0.10200279273952141,0.32734301525632936,0.6685366,0.6685366,-0.5085973
1560,0.9021807500000001,0.9225410703125,0.21239936556921798,0.04511349049420631,0.9466760358789797,1.330132,1.330132,0.5443351,0.18213865,0.1909557237890625,...,0.8954826,-0.8954826,0.11816090000000001,0.1260545553203125,0.2944837637129044,0.08672068709051771,0.32032864062946265,0.6685366,0.6685366,-0.4186424
1561,0.9824951,0.9929174578125,0.16631864120315182,0.027661890411662747,1.0067506982567733,1.486766,1.486766,0.7271095,-0.055939875,-0.07004651406250001,...,0.8631198,-0.8631198,-0.2066461,-0.1630999271875,0.23820805880148888,0.05674307927797358,0.2886947618619731,0.4147495,0.7104123,-0.7104123
1562,0.9695486,0.995752475,0.18463815696801955,0.03409124900854703,1.0127261428822565,1.497828,1.497828,0.7043009,-0.029604230000000002,-0.0657991745078125,...,0.8897563,-0.8631198,-0.03294022,-0.05242599680468749,0.2743479689766434,0.07526680808160927,0.27931217879386205,0.4944997,0.7104123,-0.7104123
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4667,0.909141,0.972528,0.231131,0.053421,0.999615,1.724845,1.724845,0.633686,-0.246933,-0.277329,...,1.347634,-1.347634,0.107912,0.051575,0.360653,0.130071,0.364322,0.607194,0.95268,-0.95268
4668,0.917021,0.983719,0.225604,0.050897,1.009257,1.724845,1.724845,0.673112,-0.246097,-0.268153,...,1.347634,-1.347634,0.143734,0.012066,0.417144,0.174009,0.417319,0.667866,0.95268,-0.95268
4669,0.916196,0.980682,0.217645,0.047369,1.004543,1.604132,1.604132,0.663282,-0.213419,-0.232996,...,0.945282,-0.815394,0.192875,0.077493,0.389231,0.151501,0.39687,0.667866,0.857202,-0.857202
4670,0.901866,0.958825,0.219793,0.048309,0.983694,1.604132,1.604132,0.635888,-0.212598,-0.225295,...,1.093888,-1.093888,0.194982,0.118935,0.319241,0.101915,0.340676,0.602909,0.709417,-0.709417


In [None]:
#X_train = np.concatenate([x1train,x2train], axis=0)
#X_test = np.concatenate([x1test,x2test], axis=0)

#Y_train = np.concatenate([y1train,y2train], axis=0)
#Y_test = np.concatenate([y1test,y2test], axis=0)

In [121]:
from sklearn.ensemble import RandomForestClassifier
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
from matplotlib import pyplot

accs=[]
for fold in range(folds):


  #dynamicxtrain,dynamicxtest,dynamicytrain,dynamicytest,dynamiclabels
  dynamicxtest = dynamicxtrain1[dynamic_fold_test[fold]]
  dynamicytest = dynamicytrain1[dynamic_fold_test[fold]]
  dynamicytrain = dynamicytrain1[dynamic_fold_train[fold]]
  dynamicxtrain = dynamicxtrain1[dynamic_fold_train[fold]]
  dynamiclabels = dynamiclabels1[dynamic_fold_test[fold]]

  #staticxtrain,staticxtest,staticytrain,staticytest,staticlabels

  staticxtest = staticxtrain1[static_fold_test[fold]]
  staticytest = staticytrain1[static_fold_test[fold]]
  staticytrain = staticytrain1[static_fold_train[fold]]
  staticxtrain = staticxtrain1[static_fold_train[fold]]
  staticlabels = staticlabels1[static_fold_test[fold]]
##################x2train,y2train##################################################
  x1test = x11train.iloc[dynamic_fold_test2[fold]]
  y1test = y11train[dynamic_fold_test2[fold]]
  y1train = y11train[dynamic_fold_train2[fold]]
  x1train = x11train.iloc[dynamic_fold_train2[fold]]

  x2test = x22train.iloc[static_fold_test2[fold]]
  y2test = y22train[static_fold_test2[fold]]
  y2train = y22train[static_fold_train2[fold]]
  x2train = x22train.iloc[static_fold_train2[fold]]


  from sklearn.ensemble import RandomForestClassifier
  from sklearn.metrics import confusion_matrix, accuracy_score

  tree_list = [100]
  scores=[]
  for n_trees in tree_list:

    rf = RandomForestClassifier(n_estimators=n_trees, random_state=40)
    rf.fit(x2train, y2train)
    rf_pred = rf.predict(x2test)
    scores.append(accuracy_score(y2test, rf_pred))
    print('Accuracy score TEST: {:.3f}'.format(accuracy_score(y2test, rf_pred)*100))

  timesteps = len(dynamicxtrain[0])
  input_dim = len(dynamicxtrain[0][0])
  n_classes =3

  print(timesteps)
  print(input_dim)
  print(len(X_train))

  epochs = 3#100
  batch_size = 16
  n_hidden = 64
  model1 = Sequential()
  # Configuring the parameters
  model1.add(LSTM(n_hidden, input_shape=(timesteps, input_dim), return_sequences=True))
  # Adding a dropout layer
  model1.add(Dropout(0.5))
  model1.add(LSTM(32))
  model1.add(Dropout(0.5))
  model1.add(Dense(n_classes, activation='softmax'))
  model1.summary()

  model1.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
  model1.fit(dynamicxtrain,
          dynamicytrain,
          batch_size=batch_size,
          #validation_data=(dynamicxtest, dynamicytest),
          epochs=epochs,
          )

  X_train = np.concatenate([x1train,x2train], axis=0)
  X_test = np.concatenate([x1test,x2test], axis=0)

  yexpect = np.concatenate([dynamiclabels,staticlabels], axis=0)
  merge_arr = np.concatenate([dynamicxtest, staticxtest], axis=0)
  scores=[]
  dynamiccount=0
  staticcount=0
  transitioncount=0
  dynamics = np.array([1,2,3])
  statics = np.array([4,5,6])
  transitions = np.array([7,8,9,10,11,12])
  for i in range(len(merge_arr)):#len(xtest)
      row = merge_arr[i].reshape(1,128,9)
      if(yexpect[i] in [1,2,3]):
          predict_x=model1.predict(row)

          pred=dynamics[np.argmax(predict_x,axis=1)]
          scores.append(pred[0])

      elif(yexpect[i] in [4,5,6]):
        s = rf.predict(X_test[i].reshape(1, -1))  #x2test.loc[0,:].to_numpy().reshape(1,-1)
        scores.append(s[0])


      print(i)
  print('dynamic count '+str(dynamiccount)+' static count '+str(staticcount)+' transition count '+str(transitioncount))
  from sklearn.metrics import confusion_matrix, accuracy_score


  print('Accuracy score TEST: {:.3f}'.format(accuracy_score(yexpect, scores)*100))
  accs.append(accuracy_score(yexpect, scores)*100)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Trainable params: 31459 (122.89 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/3
Epoch 2/3
Epoch 3/3
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
2

In [122]:
accs,mean(accs)

([86.75014560279557, 93.85377221089426, 91.87062937062937], 90.82484906143974)

**hierarchical uci har**

In [None]:


# LSTM only: 97.15
#optimal : 97.347
# rf_pred: 97.347

**Generate Dataset For traditional models**

In [None]:
X.shape

(10299, 128, 9)

In [None]:
x= X.reshape(10299*128,9)

In [None]:
a=[]
for i in range(0,10299):
    a=a+[i]*128

In [None]:
len(a)

1318272

In [None]:
x= pd.DataFrame(x)

In [None]:
x['id'] = a

Unnamed: 0,0,1,2,3,4,5,6,7,8,id
0,1.012817,-0.123217,0.102934,0.000181,0.010767,0.055561,0.030191,0.066014,0.022859,0
1,1.022833,-0.126876,0.105687,0.010139,0.006579,0.055125,0.043711,0.042699,0.010316,0
2,1.022028,-0.124004,0.102102,0.009276,0.008929,0.048405,0.035688,0.074850,0.013250,0
3,1.017877,-0.124928,0.106553,0.005066,0.007489,0.049775,0.040402,0.057320,0.017751,0
4,1.023680,-0.125767,0.102814,0.010810,0.006141,0.043013,0.047097,0.052343,0.002553,0
...,...,...,...,...,...,...,...,...,...,...
1318267,0.908386,-0.423054,-0.092933,-0.061667,-0.175584,0.151117,0.148295,-0.015923,0.109040,10298
1318268,0.898984,-0.392272,-0.063138,-0.070890,-0.145071,0.181814,0.143136,-0.024389,0.006547,10298
1318269,0.918862,-0.351680,-0.072539,-0.050755,-0.104717,0.173271,0.095931,-0.021024,-0.051342,10298
1318270,0.949475,-0.267526,-0.050975,-0.019807,-0.020764,0.195638,0.090708,-0.041893,-0.078877,10298


In [None]:
from tsfresh.feature_extraction import extract_features, MinimalFCParameters, EfficientFCParameters

minimalFCParametersForTsFresh = MinimalFCParameters()
extracted_features = extract_features(x,column_id="id",default_fc_parameters = minimalFCParametersForTsFresh)

Feature Extraction: 100%|██████████| 92691/92691 [00:53<00:00, 1726.01it/s]


In [None]:
extracted_features.columns

Index(['0__median', '0__mean', '0__standard_deviation', '0__variance',
       '0__root_mean_square', '0__maximum', '0__absolute_maximum',
       '0__minimum', '1__median', '1__mean', '1__standard_deviation',
       '1__variance', '1__root_mean_square', '1__maximum',
       '1__absolute_maximum', '1__minimum', '2__median', '2__mean',
       '2__standard_deviation', '2__variance', '2__root_mean_square',
       '2__maximum', '2__absolute_maximum', '2__minimum', '3__median',
       '3__mean', '3__standard_deviation', '3__variance',
       '3__root_mean_square', '3__maximum', '3__absolute_maximum',
       '3__minimum', '4__median', '4__mean', '4__standard_deviation',
       '4__variance', '4__root_mean_square', '4__maximum',
       '4__absolute_maximum', '4__minimum', '5__median', '5__mean',
       '5__standard_deviation', '5__variance', '5__root_mean_square',
       '5__maximum', '5__absolute_maximum', '5__minimum', '6__median',
       '6__mean', '6__standard_deviation', '6__variance',
   

In [None]:
extracted_features = extracted_features.drop(['0__length'], axis=1)
extracted_features = extracted_features.drop(['1__length'], axis=1)
extracted_features = extracted_features.drop(['2__length'], axis=1)
extracted_features = extracted_features.drop(['3__length'], axis=1)
extracted_features = extracted_features.drop(['4__length'], axis=1)
extracted_features = extracted_features.drop(['5__length'], axis=1)
extracted_features = extracted_features.drop(['0__sum_values'], axis=1)
extracted_features = extracted_features.drop(['1__sum_values'], axis=1)
extracted_features = extracted_features.drop(['2__sum_values'], axis=1)
extracted_features = extracted_features.drop(['3__sum_values'], axis=1)
extracted_features = extracted_features.drop(['4__sum_values'], axis=1)
extracted_features = extracted_features.drop(['5__sum_values'], axis=1)

In [None]:
extracted_features = extracted_features.drop(['6__length'], axis=1)
extracted_features = extracted_features.drop(['7__length'], axis=1)
extracted_features = extracted_features.drop(['8__length'], axis=1)
extracted_features = extracted_features.drop(['6__sum_values'], axis=1)
extracted_features = extracted_features.drop(['7__sum_values'], axis=1)
extracted_features = extracted_features.drop(['8__sum_values'], axis=1)

In [None]:
extracted_features.to_csv('ucihar72features.csv',index=False)
