In [None]:
from google.colab import drive
import pandas as pd
from io import StringIO
from math import sqrt
import numpy as np
import scipy.stats as st
# from joblib import load
import pickle

drive.mount('/content/drive')

Mounted at /content/drive


Đọc file tín hiệu thu được từ cảm biến

In [None]:
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='latin-1') as file:
            file_content = file.read()

        new_headers = ["time", "acc_x", "acc_y", "acc_z", "gyro_x", "gyro_y", "gyro_z"]

        df = pd.read_csv(StringIO(file_content), header=None, sep=';', names=new_headers, engine='python', skiprows=1)
        df['time'] = df['time'] - df.at[0, 'time']

        df['acc_re'] = np.sqrt(df['acc_x']**2 + df['acc_y']**2 + df['acc_z']**2)
        df['gyro_re'] = np.sqrt(df['gyro_x']**2 + df['gyro_y']**2 + df['gyro_z']**2)

        return df

    except UnicodeDecodeError as e:
        print(f"Error decoding file {file_path}: {e}")

Hàm trích xuất đặc trưng thống kê trên miền thời gian

In [None]:
def stat_features_for_raw_signal(ds):

  max_ft = np.amax(ds).reshape(-1,1)
  std_ft = np.std(ds).reshape(-1,1)
  var_ft = np.var(ds).reshape(-1,1)
  iqr_ft = st.iqr(ds).reshape(-1,1)
  mad_ft = np.median(np.sort(abs(ds - np.median(ds)))).reshape(-1,1)

  return np.concatenate((max_ft, std_ft, var_ft, iqr_ft, mad_ft), axis=1).flatten()

def stat_features_for_jerk_signal(ds):
  max_ft = np.amax(ds).reshape(-1,1)
  min_ft = np.amin(ds).reshape(-1,1)
  range_ft = np.ptp(ds).reshape(-1,1)
  std_ft = np.std(ds).reshape(-1,1)
  var_ft = np.var(ds).reshape(-1,1)
  mad_ft = np.median(np.sort(abs(ds - np.median(ds)))).reshape(-1,1)

  return np.concatenate((max_ft, min_ft, range_ft, std_ft, var_ft, mad_ft), axis=1).flatten()

def header_stat_for_raw_signals(feature, type):
  end = '_' + feature + '_' + type
  base_headers = ['max', 'std', 'var', 'iqr', 'mad']
  headers = [header + end for header in base_headers]
  return headers

def header_stat_for_jerk_signals(feature, type):
  end = '_' + feature + '_' + type
  base_headers = ['max', 'min', 'range', 'std', 'var', 'mad']
  headers = [header + end for header in base_headers]
  return headers

def make_feature_vector(data):

  features_acc_re_raw = stat_features_for_raw_signal(data['acc_re'])
  features_gyro_re_raw = stat_features_for_raw_signal(data['gyro_re'])

  features_acc_re_jerk = stat_features_for_jerk_signal(data['acc_re'].diff().dropna())
  features_gyro_re_jerk = stat_features_for_jerk_signal(data['gyro_re'].diff().dropna())

  return np.concatenate((features_acc_re_raw, features_acc_re_jerk, features_gyro_re_raw, features_gyro_re_jerk), axis=0)

headers = np.concatenate((header_stat_for_raw_signals('acc_re', 'raw'),
                          header_stat_for_jerk_signals('acc_re', 'jerk'),
                          header_stat_for_raw_signals('gyro_re', 'raw'),
                          header_stat_for_jerk_signals('gyro_re', 'jerk')), axis=0)

Load model từ file joblib

In [None]:
model = load('/content/drive/MyDrive/Dataset/best_model.joblib')

Load model từ file pkl

In [None]:
path = '/content/drive/MyDrive/Dataset/best_model.pkl'
with open(path, 'rb') as file:
    model = pickle.load(file)

Hàm dự đoán té ngã (params gồm model và filepath, output là 1 nếu ngã, -1 nếu ADL)

In [None]:
model

In [None]:
def predict(model, filepath):
  data = read_file(filepath)
  feature_vector = make_feature_vector(data)
  feature_df = pd.DataFrame([feature_vector], columns=headers)
  prediction = model.predict(feature_df)[0]
  return prediction

Test hàm dự báo

In [None]:
predict(model, '/content/drive/MyDrive/Dataset/SplittedLongData/P03_01_0.txt')

    time   acc_x    acc_y   acc_z  gyro_x  gyro_y  gyro_z     acc_re   gyro_re
0      0  6.1962 -15.9597  0.7853 -0.0176  0.0338 -0.0305  17.138308  0.048810
1      8  6.0190 -16.0747  0.7853 -0.0233  0.0422 -0.0294  17.182579  0.056463
2     15  6.0406 -16.0244  0.8428 -0.0238  0.0366 -0.0336  17.145861  0.055090
3     23  5.8921 -16.1656  0.8811 -0.0300  0.0436 -0.0326  17.228459  0.062159
4     32  5.8466 -16.1944  0.7901 -0.0199  0.0308 -0.0372  17.235591  0.052235
..   ...     ...      ...     ...     ...     ...     ...        ...       ...
77   900  5.9640 -16.1273  0.7111  0.0119 -0.0452 -0.0087  17.209438  0.047543
78   908  5.9831 -16.2064  0.8332  0.0109 -0.0464 -0.0145  17.295638  0.049820
79   920  5.9233 -16.1680  0.7925  0.0117 -0.0489 -0.0103  17.237104  0.051324
80   928  6.0190 -16.1273  0.7733  0.0077 -0.0490 -0.0087  17.231255  0.050359
81   996  6.1483 -15.9645  0.8308 -0.0061 -0.0544 -0.0234  17.127670  0.059533

[82 rows x 9 columns]


-1.0

In [None]:
import time

In [None]:
def predict_without_reading(model, data):
    feature_vector = make_feature_vector(data)
    feature_df = pd.DataFrame([feature_vector], columns=headers)
    prediction = model.predict(feature_df)[0]
    return prediction

# Đọc file một lần bên ngoài và lưu dữ liệu
data = read_file('/content/drive/MyDrive/Dataset/SplittedLongData/P03_01_0.txt')

# Bắt đầu đo thời gian
start_time = time.time()

# Gọi hàm predict mà không tính thời gian đọc file
prediction = predict_without_reading(model, data)

# Kết thúc đo thời gian
end_time = time.time()

# Tính toán và in ra thời gian thực thi
execution_time = end_time - start_time
print(f"Thời gian thực thi (không tính read_file): {execution_time} giây")

Thời gian thực thi (không tính read_file): 0.03560972213745117 giây


In [None]:
def predict_without_reading(model, data):
    feature_vector = make_feature_vector(data)
    feature_df = pd.DataFrame([feature_vector], columns=headers)
    prediction = model.predict(feature_df)[0]
    return prediction

file_paths = [f'/content/drive/MyDrive/Dataset/SplittedLongData/P03_01_{i}.txt' for i in range(67)]

total_execution_time = 0
execution_times = []

for file_path in file_paths:
    data = read_file(file_path)

    start_time = time.time()
    prediction = predict_without_reading(model, data)
    end_time = time.time()
    execution_time = end_time - start_time
    total_execution_time += execution_time

    execution_times.append(execution_time)

    print(f"Thời gian thực thi cho file {file_path}: {execution_time} giây")

average_execution_time = total_execution_time / len(file_paths)

min_execution_time = min(execution_times)
max_execution_time = max(execution_times)

print(f"Thời gian thực thi trung bình: {average_execution_time} giây")
print(f"Thời gian thực thi ngắn nhất: {min_execution_time} giây")
print(f"Thời gian thực thi dài nhất: {max_execution_time} giây")

Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03_01_0.txt: 0.051276206970214844 giây
Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03_01_1.txt: 0.04681277275085449 giây
Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03_01_2.txt: 0.04342341423034668 giây
Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03_01_3.txt: 0.0478215217590332 giây
Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03_01_4.txt: 0.05179476737976074 giây
Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03_01_5.txt: 0.04370903968811035 giây
Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03_01_6.txt: 0.06271767616271973 giây
Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03_01_7.txt: 0.050917625427246094 giây
Thời gian thực thi cho file /content/drive/MyDrive/Dataset/SplittedLongData/P03