In [1]:
import pandas as pd
import numpy as np
import os
import copy
import math
from pathlib import Path
# from google.colab import drive
# drive.mount("/content/gdrive", force_remount = True)

# **To Label the Data with 3rd class (Pre-FOG)**
Before the occurence of every FOG event, the previous w*f_s timesteps are labelled as a third class 'preFOG' which can be trained in order to predict FOG before it's onset.


In [16]:
def label_prefog(dataset,window_length = 1):
    dataset.drop(index = list(dataset[dataset['Action'] == 0].index),inplace=True) #remove data when person is walking
    window_length = 64*window_length #window size for that second(s)
    #print(dataset.Action.unique())
    fog_index=[]
    for i in dataset.index: 
        if dataset.loc[i,'Action'] == 2:
            fog_index.append(i) #add all the instances when fog is occured in that window-size
#for indices are the timestamps of the person experiencing fog in that window

    start_indices=[]
    for i in fog_index:
        if (dataset.loc[i-1,'Action']!=dataset.loc[i,'Action']): 
            # store all the instances of the timestamp before the fog (from label ==1 to label ==2)
            start_indices.append(i) #this is the timestamps when the person is starting to face fog (about to)

    prefog=[]
    for start in start_indices: #for each start instance, store the instances of the previous window
        prefog_start = [x for x in range(start-window_length,start)]
        prefog.append(prefog_start) 
        
# prefog is for window size=64 * w
    prefog = [item for sublist in prefog for item in sublist]
# now prefog is for w with each hertz of frequency w/64

    for i in prefog:
        dataset.loc[i,'Action'] = 3 #mark a new label =3 for each of those instances
    #print(dataset.Action.unique())  # 1, 3, 2 where 3 is prefog
    dataset['Action'] = dataset['Action'] - 1
    #print(dataset.Action.unique())  # 0, 2, 1 where 2 is prefog

    return dataset

In [17]:
data_path = "/Users/vanshika/Downloads/dataset_fog_release/dataset_fog_release/dataset"
#" Enter the folder path of Unzipped UCI Daphnet Dataset "

people = []
dataset=pd.DataFrame()
for person in os.listdir(data_path):
    if '.txt' in person: 
        people.append(person)
        
for window_length in range(1,5):
    for person in people: 
        name = person.split('R')[0]
        print (name)
        file = data_path+"/"+person
        temp = pd.read_csv(file,delimiter= " ", header = None)
        print (person,' is read',end = '\t')

        if 2 in temp[max(temp.columns)].unique(): #that patient/ person has undergone FoG == 2
            print ('Adding {} to dataset'.format(person),end = '\t')            
            temp.columns = ['time','A_F','A_V','A_L','L_F','L_V','L_L','T_F','T_V','T_L','Action']   
        #    print(temp.Action.unique())
            temp = label_prefog(temp,window_length).reset_index(drop=True)

            temp['name'] = name
            
            print ('{} is labelled'.format(person))
            dataset = pd.concat([dataset,temp],axis = 0)

        print ('')
    dataset.reset_index(drop =True,inplace=True) 
    dataset.to_csv("/Users/vanshika/Downloads/dataset_fog_release/dataset_fog_release/dataset/data.csv",index = None)


display(dataset.shape)


S01
S01R01.txt  is read	Adding S01R01.txt to dataset	S01R01.txt is labelled

S01
S01R02.txt  is read	Adding S01R02.txt to dataset	S01R02.txt is labelled

S02
S02R01.txt  is read	Adding S02R01.txt to dataset	S02R01.txt is labelled

S02
S02R02.txt  is read	Adding S02R02.txt to dataset	S02R02.txt is labelled

S03
S03R01.txt  is read	Adding S03R01.txt to dataset	S03R01.txt is labelled

S03
S03R02.txt  is read	Adding S03R02.txt to dataset	S03R02.txt is labelled

S03
S03R03.txt  is read	
S04
S04R01.txt  is read	
S05
S05R01.txt  is read	Adding S05R01.txt to dataset	S05R01.txt is labelled

S05
S05R02.txt  is read	Adding S05R02.txt to dataset	S05R02.txt is labelled

S06
S06R01.txt  is read	Adding S06R01.txt to dataset	S06R01.txt is labelled

S06
S06R02.txt  is read	
S07
S07R01.txt  is read	Adding S07R01.txt to dataset	S07R01.txt is labelled

S07
S07R02.txt  is read	Adding S07R02.txt to dataset	S07R02.txt is labelled

S08
S08R01.txt  is read	Adding S08R01.txt to dataset	S08R01.txt is labelled

S

(3298672, 12)

In [19]:
display(dataset.head())
(dataset.name).unique(), dataset.Action.unique() #person S04 and S10 do not undergo FoG

Unnamed: 0,time,A_F,A_V,A_L,L_F,L_V,L_L,T_F,T_V,T_L,Action,name
0,750000,-30,990,326,-45,972,181,-38,1000,29,0,S01
1,750015,-30,1000,356,-18,981,212,-48,1028,29,0,S01
2,750031,-20,990,336,18,981,222,-38,1038,9,0,S01
3,750046,-20,1000,316,36,990,222,-19,1038,9,0,S01
4,750062,0,990,316,36,990,212,-29,1038,29,0,S01


(array(['S01', 'S02', 'S03', 'S05', 'S06', 'S07', 'S08', 'S09'],
       dtype=object),
 array([0, 2, 1], dtype=int64))

# **To Extract Non-Overlapping windows of length w *f_s  from the continously logged accelerometer data from the dataset.**

In [20]:
def create_window(act,window_length,dataframe):
    indices = list(dataframe[dataframe.Action == act].index)
    groups = []
    temp = []
    group_count = 0
    for i in range(len(indices)):
        if i == len(indices)-1:
            temp.append(indices[i])
            groups.append(temp)
            temp = []
            break
        temp.append(indices[i])
        if indices[i]+1 != indices[i+1]: 
            group_count+=1
            groups.append(temp)
            temp = []

    fs = 64
    window_length = 1
    # window_length = window_length*fs

    final_dataframe = pd.DataFrame()
    for g in groups: 
        required = math.floor(len(g)/(window_length*fs))
        req_index = g[0:(required*fs)]
        final_dataframe = pd.concat([final_dataframe,dataframe.iloc[req_index,:]],axis = 0)

    return final_dataframe

In [21]:
for window_length in range(1,5):
    activities = []
    for act in range(3):
        activities.append(create_window(act,window_length,dataset))
    to_write = pd.concat(activities,axis = 0)
    to_write.to_csv("/Users/vanshika/Downloads/dataset_fog_release/dataset_fog_release/dataset/window.csv",index = False)

# Extracting Features

 The following feature are extracted in the time domain 
 1. Mean
 2. std
 3. var
 4. Mav
 5. rms
 
 The following feature are extracted in the frequency  domain 
 1. Freeze Index
 2. Power
 3. Energy
 4. Entropy
 5. Peak Frequency


In [25]:
#compute time-domain features for each window
window_length = 1
fs = 64
w = window_length*fs
dataframe = pd.read_csv("/Users/vanshika/Downloads/dataset_fog_release/dataset_fog_release/dataset/window.csv")

df = dataframe.drop(columns=['time','Action','name'])
stat = pd.DataFrame()
col= list(df.columns)
#for each sensor column, compute the features
for s in col:    
    print (s)
    mn =[] 
    var = []
    std = []
    mav = []
    rms =[]
    for i in range(0,len(df),w):
        mn_  = np.mean(df[s].iloc[i:i+w])
        var_  = np.var(df[s].iloc[i:i+w])
        std_  = np.std(df[s].iloc[i:i+w])
        mav_  = np.mean(abs(df[s].iloc[i:i+w]))
        rms_  = np.sqrt(np.mean((df[s].iloc[i:i+w])**2))

        mn.append(mn_)
        var.append(var_)
        std.append(std_)
        mav.append(mav_)
        rms.append(rms_)
# make a new column for each feature for each sensor- 9 * 5 = 45 features
    stat['mean_'+s] = mn
    stat['var_'+s] = var
    stat['std_'+s] = std
    stat['rms_'+s] = rms
    stat['mav_'+s] = mav

print(stat.shape)

stat1 = copy.copy(stat)
stat1['w'] = dataframe['Action'].iloc[[x for x in range(0,len(dataframe),w)]].to_list()
order = ['w']
order += stat1.columns.to_list()[:-1]
stat1 = stat1[order]
col = stat1.columns.to_list()
col[0] = 0
stat1.columns = col
feature_name ="/Users/vanshika/Downloads/dataset_fog_release/dataset_fog_release/dataset/time.csv"
stat1.to_csv(feature_name, index = False)

A_F
A_V
A_L
L_F
L_V
L_L
T_F
T_V
T_L
(50675, 45)


In [23]:
stat1.shape # adding labels inplace

(50675, 46)

In [24]:
# While including frequency domain features in our model
# yields better recall score, it is observed in some research papers
# that the latency due to these features compared to itâ€™s
# time domain counterpart is very high. This is attributed to the
# simplicity of time domain features which are based on simple
# mathematical and statistical operations whereas the frequency
# domain needs performance of Fast Fourier Transform (FFT)

# #frquency domain features
# from scipy.signal import butter, lfilter

# window_length = 1
# fs = 64
# w = window_length*fs
# dataframe = pd.read_csv("/Users/vanshika/Downloads/dataset_fog_release/dataset_fog_release/dataset/window.csv")

# df = dataframe.drop(columns=['time','Action','name'])
# col= list(df.columns)
# order=5
# fi=pd.DataFrame()
# power = pd.DataFrame()

# bands = {'locomotor' :(0.5,3),'freeze' :(3,8)} #data from parkinson-gait paper

# for s in col:
#     xtemp = []
#     xtemp1 = []
#     for i in range(0,len(df),w):
#         nyq=0.5*fs
        
#         #locomotor band 0.5-3hz
#         loc_low= 0.5/nyq
#         loc_high=3/nyq
        
#         #clipping off band from the window
#         b, a = butter(order, [loc_low, loc_high], btype='band')
#         y=lfilter(b,a,df[s].iloc[i:i+w])
        
#         #total power in locomotor band
#         e1=sum([x**2 for x in y])

#         #freeze band 3-8hz
#         frez_low= 3/nyq
#         frez_high=8/nyq

#         #clipping off band from the window
#         b1, a1 = butter(order, [frez_low, frez_high], btype='band')
#         y1=lfilter(b1,a1,df[s].iloc[i:i+w])
#         #total power in locomotor band
#         e2=sum([x**2 for x in y1])
        
#         FI=e2/e1
#         POW=e2+e1
#         xtemp.append(FI)
#         xtemp1.append(POW)
#     fi['FI'+s] = xtemp
#     power['P'+s] = xtemp1
# print ("Freeze and power done")


# w = window_length*fs
# E=[]
# for i in range(0,len(df),w):
#     energy = np.sum((df.iloc[i:i+w,:])**2)
#     E.append(energy)
# E = pd.DataFrame(E)
# E.columns = ["EN_" + x for x in df.columns]

# #Entropy 
# from scipy.signal import periodogram

# peak_f = pd.DataFrame()
# PSE = pd.DataFrame()
# for s in col:
#     peakF = []
#     pse = []
#     for i in range(0,len(df),w):
#         f,Pxx_den = periodogram(df[s].iloc[i:i+w],fs)
#         p_norm = Pxx_den/sum(Pxx_den)
#         p_norm = list(filter(lambda a: a != 0, p_norm))
#         pse.append(-(np.sum(p_norm*np.log(p_norm))))
#         peak = (fs/w)*max(Pxx_den)
#         peakF.append(peak)
#     PSE['ENt_'+s] = pse
#     peak_f['peak_'+s] = peakF
# PSE.fillna(0,inplace = True)


# freq = pd.concat([fi,power,E,PSE,peak_f],axis = 1)

# feature_name = "/Users/vanshika/Downloads/dataset_fog_release/dataset_fog_release/dataset/frequency.csv"
# freq.to_csv(feature_name, index = False)
  