In [1]:
import numpy as np
import pandas as pd
import tqdm
import collections 
import time
import pickle
import random

In [2]:
# CONSTANT
WINDOW_SIZE = 40
THRESHOLD = 25

In [3]:
def select_random_sample(dic,number,file_name=None):
    random.seed(42)
    random_list = random.sample(list(dic.keys()),number)
    dic_random = {i: dic[i] for i in random_list}
    if (file_name != None):
        with open('data/' + file_name + '.pkl', 'wb') as file:
            pickle.dump(dic_random, file)
    return dic_random

In [4]:
def load_dic(file_name):
    with open('data/' + file_name + '.pkl', 'rb') as file:
        data = pickle.load(file)
    return data

In [5]:
def generate_dic(ls_X,ls_y):
    dic = {}
    for i in range(0,len(ls_X)):
        # X
        df_chunk = pd.DataFrame(ls_X[i]).T
        # y
        label_chunk = ls_y[i]
        dic[i] = (df_chunk,label_chunk)
    return dic

In [6]:
def convert_to_np_array(data,file_name=None):
    X_array = []
    y_array = []
    for i in data.keys():
        temp_x = data[i][0].to_numpy()
        X_array.append(temp_x)
        temp_y = data[i][1]
        y_array.append(temp_y)
    X_array = np.array(X_array)
    y_array = np.array(y_array)
    if (file_name != None):
        np.save('data/X_' + file_name + '_array.npy', X_array)
        np.save('data/y_' + file_name + '_array.npy', y_array)
    return X_array,y_array

In [7]:
train_df = pd.read_csv('data/raw182_Training_Relabeled_Auto_25.csv')
test_df = pd.read_csv('data/raw91_Testing_Relabeled_Auto_25.csv')

### data preprocessing

- sliding window = 40
- if there is equal or more than 25 fall -> label of chunk is fall

In [8]:
n = 40
X_train_notfall, X_train_fall, y_train_notfall, y_train_fall = list(), list(), list(), list()
for i in tqdm.tqdm(range(0, len(train_df) - WINDOW_SIZE)):
    chunk = list()
    
    chunk.append(train_df[' ms_accelerometer_x'][i : i + n])
    chunk.append(train_df[' ms_accelerometer_y'][i : i + n])
    chunk.append(train_df[' ms_accelerometer_z'][i : i + n])
    
    count = collections.Counter(train_df['outcome'][i : i + n])
    num_fall = count[1]
    
    if num_fall >= THRESHOLD:
        y_train_fall.append(1)
        X_train_fall.append(chunk)
    else:
        y_train_notfall.append(0)
        X_train_notfall.append(chunk)

100%|██████████████████████████████████| 33980/33980 [00:01<00:00, 20778.47it/s]


In [9]:
n = 40
X_test_notfall, X_test_fall, y_test_notfall, y_test_fall = list(), list(), list(), list()
for i in tqdm.tqdm(range(0, len(test_df) - WINDOW_SIZE)):
    chunk = list()
    
    chunk.append(test_df[' ms_accelerometer_x'][i : i + n])
    chunk.append(test_df[' ms_accelerometer_y'][i : i + n])
    chunk.append(test_df[' ms_accelerometer_z'][i : i + n])
    
    count = collections.Counter(test_df['outcome'][i : i + n])
    num_fall = count[1]
    
    if num_fall >= THRESHOLD:
        y_test_fall.append(1)
        X_test_fall.append(chunk)
    else:
        y_test_notfall.append(0)
        X_test_notfall.append(chunk)

100%|██████████████████████████████████| 17189/17189 [00:00<00:00, 19552.15it/s]


In [10]:
len_train_notfall = len(X_train_notfall)
len_train_fall = len(X_train_fall)
print('number of train data fall', len_train_fall)
print('number of train data not fall', len_train_notfall)

number of train data fall 2912
number of train data not fall 31068


In [11]:
len_test_notfall = len(X_test_notfall)
len_test_fall = len(X_test_fall)
print('number of test data fall', len_test_fall)
print('number of test data not fall', len_test_notfall)

number of test data fall 1456
number of test data not fall 15733


### select random

In [12]:
dic_train_fall = generate_dic(X_train_fall,y_train_fall)
len(dic_train_fall)

2912

In [13]:
# dic_train_fall[0]

In [14]:
dic_train_notfall = generate_dic(X_train_notfall,y_train_notfall)
len(dic_train_notfall)

31068

In [15]:
# dic_train_notfall[0]

In [16]:
dic_test_fall = generate_dic(X_test_fall,y_test_fall)
len(dic_test_fall)

1456

In [17]:
# dic_test_fall[0]

In [18]:
dic_test_notfall = generate_dic(X_test_notfall,y_test_notfall)
len(dic_test_notfall)

15733

In [19]:
# dic_test_notfall[0]

In [20]:
dic_train_notfall_random = select_random_sample(dic_train_notfall,len_train_fall)
len(dic_train_notfall_random)

2912

In [21]:
# dic_train_notfall_random[list(dic_train_notfall_random.keys())[0]]

In [22]:
dic_test_notfall_random = select_random_sample(dic_test_notfall,len_test_fall)
len(dic_test_notfall_random)

1456

In [23]:
# dic_test_notfall_random[list(dic_test_notfall_random.keys())[0]]

### create numpy array

In [24]:
X_train_fall_array,y_train_fall_array = convert_to_np_array(dic_train_fall,"train_fall")

In [25]:
X_train_notfall_array, y_train_notfall_array = convert_to_np_array(dic_train_notfall_random,"train_notfall")

In [26]:
X_test_fall_array, y_test_fall_array = convert_to_np_array(dic_test_fall,"test_fall")

In [27]:
X_test_notfall_array, y_test_notfall_array = convert_to_np_array(dic_test_notfall_random,"test_notfall")

### load numpy array

In [28]:
def load_np_array(file_name):
    X_array = np.load('data/X_' + file_name + '_array.npy')
    y_array = np.load('data/y_' + file_name + '_array.npy')
    return X_array, y_array

In [29]:
X_train_fall, y_train_fall = load_np_array("train_fall")

In [30]:
print(X_train_fall.shape)
print(y_train_fall.shape)

(2912, 40, 3)
(2912,)


In [31]:
X_train_notfall, y_train_notfall = load_np_array("train_notfall")

In [32]:
print(X_train_notfall.shape)
print(y_train_notfall.shape)

(2912, 40, 3)
(2912,)


In [33]:
X_test_fall, y_test_fall = load_np_array("test_fall")

In [34]:
print(X_test_fall.shape)
print(y_test_fall.shape)

(1456, 40, 3)
(1456,)


In [35]:
X_test_notfall, y_test_notfall = load_np_array("test_fall")

In [36]:
print(X_test_notfall.shape)
print(y_test_notfall.shape)

(1456, 40, 3)
(1456,)
