In [1]:
# Define the project directory inorder to complie with the reproduceability of the code
project_path = """/home/dhamaks/GitHub/Daphnet-Freezing-of-Gait-Forecasting/"""

Each file comprises the data in a matrix format, with one line per sample, and one column per channel. The channels are as follows:
 - Time of sample in millisecond
 - Ankle (shank) acceleration - horizontal forward acceleration [mg]
 - Ankle (shank) acceleration - vertical [mg]
 - Ankle (shank) acceleration - horizontal lateral [mg]
 - Upper leg (thigh) acceleration - horizontal forward acceleration [mg]
 - Upper leg (thigh) acceleration - vertical [mg]
 - Upper leg (thigh) acceleration - horizontal lateral [mg]
 - Trunk acceleration - horizontal forward acceleration [mg]
 - Trunk acceleration - vertical [mg]
 - Trunk acceleration - horizontal lateral [mg]
 - Annotation [0, 1, or 2]

The meaning of the annotations are as follows:
 - 0: not part of the experiment. For instance the sensors are installed on the user or the user is performing activities unrelated to the experimental protocol, such as debriefing
 - 1: experiment, no freeze (can be any of stand, walk, turn)
 - 2: freeze

In [2]:
# Import all the required libraries
import warnings
warnings.filterwarnings('ignore')

import os
import pandas as pd

In [3]:
# Get all the file's names from our datasets folder
list_of_files = [item for item in os.listdir(project_path + "/Daphnet-Freezing-of-Gait-Dataset/dataset/") if "Identifier" not in item]

In [4]:
# Coustomly define the column names based on the information available in the readme of the dataset
col_names = """Time in millisecond
Ankle_acc - horizontal forward
Ankle_acc - vertical
Ankle_acc - horizontal lateral
Upper_leg_acc - horizontal forward
Upper_leg_acc - vertical 
Upper_leg_acc - horizontal lateral
Trunk_acc - horizontal forward
Trunk_acc - vertical
Trunk_acc - horizontal lateral
Annotation""".split(sep = "\n")

In [5]:
# Create a dummy variable for consolidation of the data from all the files
final_dataset = pd.DataFrame(columns=col_names)

In [6]:
# Traverse file-by-file and append all the information in the initial dummy variable
for i in range(len(list_of_files)):
    temp = pd.read_csv(project_path + "/Daphnet-Freezing-of-Gait-Dataset/dataset/" + list_of_files[i], encoding="UTF-8", header=None, sep=" ")
    temp.columns = col_names
    (temp['File S'], temp['File R']) = tuple([int(item) for item in list_of_files[i].split(sep=".")[0][1:].split(sep="R")])
    temp['File'] = list_of_files[i].split(sep=".")[0]
    final_dataset = pd.concat([final_dataset, temp], axis=0)

In [7]:
final_dataset

Unnamed: 0,Time in millisecond,Ankle_acc - horizontal forward,Ankle_acc - vertical,Ankle_acc - horizontal lateral,Upper_leg_acc - horizontal forward,Upper_leg_acc - vertical,Upper_leg_acc - horizontal lateral,Trunk_acc - horizontal forward,Trunk_acc - vertical,Trunk_acc - horizontal lateral,Annotation,File S,File R,File
0,15,0,0,0,127,37,-969,0,0,0,0,8.0,1.0,S08R01
1,31,0,0,0,127,37,-969,0,0,0,0,8.0,1.0,S08R01
2,46,70,39,-970,100,55,-969,0,0,0,0,8.0,1.0,S08R01
3,62,70,39,-970,109,46,-969,0,0,0,0,8.0,1.0,S08R01
4,78,70,39,-980,109,46,-969,0,0,0,0,8.0,1.0,S08R01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195732,3058328,232,156,-930,72,27,-979,398,-266,-854,0,4.0,1.0,S04R01
195733,3058343,242,166,-940,63,37,-969,398,-266,-854,0,4.0,1.0,S04R01
195734,3058359,212,166,-920,63,37,-979,388,-266,-825,0,4.0,1.0,S04R01
195735,3058375,232,166,-920,81,37,-989,407,-266,-834,0,4.0,1.0,S04R01


In [8]:
# List down all the original file names
final_dataset['File'].unique()

array(['S08R01', 'S09R01', 'S07R01', 'S01R02', 'S05R02', 'S01R01',
       'S03R03', 'S02R02', 'S02R01', 'S06R01', 'S07R02', 'S10R01',
       'S05R01', 'S03R01', 'S06R02', 'S03R02', 'S04R01'], dtype=object)

In [9]:
# Sort the dataframe according to each file's data and then according to the time-stamp
final_dataset.sort_values(["File", "Time in millisecond"], inplace = True)
final_dataset

Unnamed: 0,Time in millisecond,Ankle_acc - horizontal forward,Ankle_acc - vertical,Ankle_acc - horizontal lateral,Upper_leg_acc - horizontal forward,Upper_leg_acc - vertical,Upper_leg_acc - horizontal lateral,Trunk_acc - horizontal forward,Trunk_acc - vertical,Trunk_acc - horizontal lateral,Annotation,File S,File R,File
0,15,70,39,-970,0,0,0,0,0,0,0,1.0,1.0,S01R01
1,31,70,39,-970,0,0,0,0,0,0,0,1.0,1.0,S01R01
2,46,60,49,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
3,62,60,49,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
4,78,50,39,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193298,3020296,-131,107,-960,0,0,0,0,0,0,0,10.0,1.0,S10R01
193299,3020312,-121,127,-970,0,0,0,0,0,0,0,10.0,1.0,S10R01
193300,3020328,-141,117,-960,0,0,0,0,0,0,0,10.0,1.0,S10R01
193301,3020343,-131,127,-980,0,0,0,0,0,0,0,10.0,1.0,S10R01


In [10]:
final_dataset[final_dataset['File']=='S01R01']

Unnamed: 0,Time in millisecond,Ankle_acc - horizontal forward,Ankle_acc - vertical,Ankle_acc - horizontal lateral,Upper_leg_acc - horizontal forward,Upper_leg_acc - vertical,Upper_leg_acc - horizontal lateral,Trunk_acc - horizontal forward,Trunk_acc - vertical,Trunk_acc - horizontal lateral,Annotation,File S,File R,File
0,15,70,39,-970,0,0,0,0,0,0,0,1.0,1.0,S01R01
1,31,70,39,-970,0,0,0,0,0,0,0,1.0,1.0,S01R01
2,46,60,49,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
3,62,60,49,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
4,78,50,39,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151982,2374734,80,39,-960,9,-46,-1000,514,-390,-747,0,1.0,1.0,S01R01
151983,2374750,60,39,-950,-9,-27,-979,514,-390,-718,0,1.0,1.0,S01R01
151984,2374765,60,39,-950,-18,-37,-979,524,-390,-747,0,1.0,1.0,S01R01
151985,2374781,60,29,-950,9,-46,-969,524,-390,-728,0,1.0,1.0,S01R01


In [11]:
final_dataset.drop([0, 3], axis=0)

Unnamed: 0,Time in millisecond,Ankle_acc - horizontal forward,Ankle_acc - vertical,Ankle_acc - horizontal lateral,Upper_leg_acc - horizontal forward,Upper_leg_acc - vertical,Upper_leg_acc - horizontal lateral,Trunk_acc - horizontal forward,Trunk_acc - vertical,Trunk_acc - horizontal lateral,Annotation,File S,File R,File
1,31,70,39,-970,0,0,0,0,0,0,0,1.0,1.0,S01R01
2,46,60,49,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
4,78,50,39,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
5,93,50,39,-960,0,0,0,0,0,0,0,1.0,1.0,S01R01
6,109,60,39,-990,0,0,0,0,0,0,0,1.0,1.0,S01R01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193298,3020296,-131,107,-960,0,0,0,0,0,0,0,10.0,1.0,S10R01
193299,3020312,-121,127,-970,0,0,0,0,0,0,0,10.0,1.0,S10R01
193300,3020328,-141,117,-960,0,0,0,0,0,0,0,10.0,1.0,S10R01
193301,3020343,-131,127,-980,0,0,0,0,0,0,0,10.0,1.0,S10R01


In [12]:
# Check of any row that might contain any null values
remove_indexes = []
for row_index in range(final_dataset.shape[0]):
    if final_dataset.iloc[row_index].isna().sum() > 0:
        remove_indexes.append(row_index)
final_dataset.drop(remove_indexes, axis=0, inplace=True)
final_dataset.reset_index(drop=True, inplace=True)

In [13]:
final_dataset.to_csv(project_path + "/Daphnet-Freezing-of-Gait-Dataset/consolidated_dataset.csv", header=True, index=False, sep="\t")