In [1]:
import matplotlib.pyplot as plt
from google.colab import drive
import pandas as pd
from io import StringIO
from math import sqrt
import numpy as np
import scipy.stats as st
from scipy.fftpack import fft, fftfreq
from scipy.signal import argrelextrema
import operator
import os
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from sklearn import preprocessing
from sklearn import svm
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import LinearSVC, SVC
from time import time
import seaborn as sns
from sklearn.model_selection import StratifiedShuffleSplit


drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
def euclidean_norm(x, y, z):
  return sqrt(x**2 + y**2 + z**2)

def find_max_norm_index(df):
    max_norm_index = df.apply(lambda row: euclidean_norm(abs(row['acc_x']) + row['gyro_x'], abs(row['acc_y']) + row['gyro_y'], abs(row['acc_z']) + row['gyro_z']), axis=1).idxmax()
    time = df.loc[max_norm_index, 'time']
    return max_norm_index, time

def selected_signals_range(df, reference_time):
  return df[abs(df.index - reference_time) <= 500]


In [3]:
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='latin-1') as file:
            file_content = file.read()

        # Define the new column names
        new_headers = ["time", "acc_x", "acc_y", "acc_z", "gyro_x", "gyro_y", "gyro_z"]

        df = pd.read_csv(StringIO(file_content), header=None, sep=';', names=new_headers, engine='python', skiprows=1)
        df['time'] = df['time'] - df.at[0, 'time']

        index, time = find_max_norm_index(df)
        return df, index, time

    except UnicodeDecodeError as e:
        print(f"Error decoding file {file_path}: {e}")

In [4]:
# Parameter: data series
def stat_features(ds):
  if ds.empty:
    raise ValueError("Input series is empty.")
  ds = np.array(ds).flatten()
  mean_ft = np.mean(ds).reshape(-1,1)           # mean
  min_ft = np.amin(ds).reshape(-1,1)            # min
  max_ft = np.amax(ds).reshape(-1,1)            # max
  range_ft = np.ptp(ds).reshape(-1,1)           # range (peak-to-peak)
  std_ft = np.std(ds).reshape(-1,1)             # standard deviation
  var_ft = np.var(ds).reshape(-1,1)             # variance
  skew_ft = st.skew(ds).reshape(-1,1)           # skewness
  kurtosis_ft = st.kurtosis(ds).reshape(-1,1)   # kurtosis
  iqr_ft = st.iqr(ds).reshape(-1,1)             # interquartile rante
  mad_ft = np.median(np.sort(abs(ds - np.median(ds)))).reshape(-1,1) # median absolute deviation

  return np.concatenate((mean_ft, min_ft, max_ft, range_ft, std_ft, var_ft, skew_ft, kurtosis_ft, iqr_ft, mad_ft), axis=1).flatten()


In [5]:
def make_feature_vector(data, Te=1.0):
  if data.empty:
    raise ValueError("Input series is empty.")
  acc_re = np.sqrt(data['acc_x']**2 + data['acc_y']**2 + data['acc_z']**2)
  gyro_re = np.sqrt(data['gyro_x']**2 + data['gyro_y']**2 + data['gyro_z']**2)

  # Raw signals
  features_acc_x = stat_features(data['acc_x'])
  features_acc_y = stat_features(data['acc_y'])
  features_acc_z = stat_features(data['acc_z'])
  features_acc_re = stat_features(acc_re)
  features_gyro_x = stat_features(data['gyro_x'])
  features_gyro_y = stat_features(data['gyro_y'])
  features_gyro_z = stat_features(data['gyro_z'])
  features_gyro_re = stat_features(gyro_re)

  # Jerk signals
  features_acc_x_jerk = stat_features(data['acc_x'].diff().dropna())
  features_acc_y_jerk = stat_features(data['acc_y'].diff().dropna())
  features_acc_z_jerk = stat_features(data['acc_z'].diff().dropna())
  features_acc_re_jerk = stat_features(acc_re.diff().dropna())
  features_gyro_x_jerk = stat_features(data['gyro_x'].diff().dropna())
  features_gyro_y_jerk = stat_features(data['gyro_y'].diff().dropna())
  features_gyro_z_jerk = stat_features(data['gyro_z'].diff().dropna())
  features_gyro_re_jerk = stat_features(gyro_re.diff().dropna())

  return np.concatenate((features_acc_x, features_acc_y, features_acc_z, features_acc_re,
                         features_gyro_x, features_gyro_y, features_gyro_z, features_gyro_re,
                         features_acc_x_jerk, features_acc_y_jerk, features_acc_z_jerk, features_acc_re_jerk,
                         features_gyro_x_jerk, features_gyro_y_jerk, features_gyro_z_jerk, features_gyro_re_jerk), axis=0)

In [6]:
def read_data(folder_path):
  feature_vectors = []
  label = 1 if folder_path.endswith('Falling') else -1
  # Duyệt qua tất cả các thư mục và file trong thư mục gốc
  for root, dirs, files in os.walk(folder_path):
      for file in files:
        try:
            if file.endswith('.txt'):
                file_path = os.path.join(root, file)
                df, index, time = read_file(file_path)
                df.set_index('time', inplace=True)
                feature_vector = make_feature_vector(selected_signals_range(df, time))
                feature_vectors.append(np.append(feature_vector, label))
                print(f"Processed file: {file_path}")
        except ValueError as error:
            print(f"Error processing file {file_path}: {error}")

  feature_matrix = np.vstack(feature_vectors)
  return feature_matrix

In [7]:
fvs_falling = read_data('/content/drive/MyDrive/FallDetectionAI/TestDataset/Falling')
fvs_adl = read_data('/content/drive/MyDrive/FallDetectionAI/TestDataset/ADL')
fvs = np.concatenate((fvs_falling, fvs_adl), axis=0)

Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_01.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_02.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_03.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_04.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_05.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_06.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_07.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_08.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_09.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F06/P13_10.txt
Processed file: /content/drive/MyDrive/FallDetectionAI/TestDataset/Falling/F01/P13_01.txt
Processed 

In [8]:
def header_stat(feature, type):
  end = '_' + feature + '_' + type
  base_headers = ['mean', 'min', 'max', 'range', 'std', 'var', 'skew', 'kurtosis', 'iqr', 'mad']
  headers = [header + end for header in base_headers]
  return headers

headers = np.concatenate((header_stat('acc_x', 'raw'), header_stat('acc_y', 'raw'), header_stat('acc_z', 'raw'), header_stat('acc_re', 'raw'),
                          header_stat('gyro_x', 'raw'), header_stat('gyro_y', 'raw'), header_stat('gyro_z', 'raw'), header_stat('gyro_re', 'raw'),
                          header_stat('acc_x', 'jerk'), header_stat('acc_y', 'jerk'), header_stat('acc_z', 'jerk'), header_stat('acc_re', 'jerk'),
                          header_stat('gyro_x', 'jerk'), header_stat('gyro_y', 'jerk'), header_stat('gyro_z', 'jerk'), header_stat('gyro_re', 'jerk'),
                          ['fall_adl_bin']), axis=0)

print(len(headers))

161


In [9]:
df = pd.DataFrame(fvs, columns=headers)

In [10]:
df.head()

Unnamed: 0,mean_acc_x_raw,min_acc_x_raw,max_acc_x_raw,range_acc_x_raw,std_acc_x_raw,var_acc_x_raw,skew_acc_x_raw,kurtosis_acc_x_raw,iqr_acc_x_raw,mad_acc_x_raw,...,min_gyro_re_jerk,max_gyro_re_jerk,range_gyro_re_jerk,std_gyro_re_jerk,var_gyro_re_jerk,skew_gyro_re_jerk,kurtosis_gyro_re_jerk,iqr_gyro_re_jerk,mad_gyro_re_jerk,fall_adl_bin
0,4.809949,-37.2394,29.8461,67.0855,8.426082,70.998858,-1.849704,9.899695,2.904175,1.50475,...,-2.659478,3.532227,6.191706,0.767666,0.589311,0.294348,6.32525,0.415586,0.246427,1.0
1,0.446379,-78.4532,26.3147,104.7679,13.283288,176.445742,-3.988632,20.970029,10.21725,4.1371,...,-2.49436,1.385561,3.879921,0.540023,0.291625,-1.457188,5.536205,0.391848,0.188081,1.0
2,11.579571,-14.6669,78.4508,93.1177,11.924689,142.1982,1.816215,9.263484,11.47065,4.7932,...,-3.712825,2.636321,6.349147,0.815085,0.664364,-0.605556,5.306591,0.48663,0.247422,1.0
3,2.233337,-78.4532,32.1948,110.648,11.777095,138.699975,-2.713029,19.796794,7.90325,3.2178,...,-3.181417,3.26416,6.445577,0.65797,0.432925,-0.276742,11.707647,0.426204,0.201116,1.0
4,7.328037,-58.2677,78.4508,136.7185,15.47983,239.625134,-0.41826,8.952035,5.8586,2.5978,...,-2.062997,2.178181,4.241178,0.674245,0.454606,0.460089,2.975093,0.397095,0.211749,1.0


In [11]:
df.to_csv('/content/drive/MyDrive/FallDetectionAI/TestDataset/dataset_raw.csv', index=False)

In [13]:
new_df = df[['max_acc_re_raw', 'std_acc_re_raw', 'var_acc_re_raw', 'iqr_acc_re_raw', 'mad_acc_re_raw', 'max_acc_re_jerk', 'min_acc_re_jerk', 'range_acc_re_jerk', 'std_acc_re_jerk', 'var_acc_re_jerk', 'mad_acc_re_jerk', 'max_gyro_re_raw', 'std_gyro_re_raw', 'var_gyro_re_raw', 'iqr_gyro_re_raw', 'mad_gyro_re_raw', 'max_gyro_re_jerk', 'min_gyro_re_jerk', 'range_gyro_re_jerk', 'std_gyro_re_jerk', 'var_gyro_re_jerk', 'mad_gyro_re_jerk', 'fall_adl_bin']]
new_df.to_csv('/content/drive/MyDrive/FallDetectionAI/TestDataset/dataset_extract.csv')