In [76]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
from scipy.stats import kurtosis, skew
from scipy.signal import find_peaks
from scipy.fft import fft

In [56]:
output_df = pd.read_csv("output.csv")
output_df['act'] = output_df['act'].map({'Others': 0, 'Normal': 1, 'Turning': 2,'Crouching': 3, 'Falling': 4})
output_df['act'].isnull().sum()

0

In [57]:
acc_columns = ['f_acc_x', 'f_acc_y', 'f_acc_z', 'w_acc_x', 'w_acc_y', 'w_acc_z']
gyr_columns = ['f_gyr_x', 'f_gyr_y', 'f_gyr_z', 'w_gyr_x', 'w_gyr_y', 'w_gyr_z']
loc_columns = ['f_loc_h', 'f_loc_v', 'f_loc_d', 'w_loc_h', 'w_loc_v', 'w_loc_d']
mag_columns = ['f_mag_x', 'f_mag_y', 'f_mag_z', 'w_mag_x', 'w_mag_y', 'w_mag_z']
all_columns = acc_columns + gyr_columns + loc_columns + mag_columns

In [58]:
# 定义函数以计算零交叉点
def zero_crossings(arr):
    return ((arr[:-1] * arr[1:]) < 0).sum()

In [59]:
def fft_features(arr):
    fft_coeff = fft(arr)
    return np.abs(fft_coeff[:len(fft_coeff)//2]).mean()  # 只保留前半部分系数的均值

In [60]:
window_size = 50
for col in all_columns:
    output_df[f'{col}_mean'] = output_df[col].rolling(window=window_size, min_periods=1).mean()
    output_df[f'{col}_std'] = output_df[col].rolling(window=window_size, min_periods=1).std()
    output_df[f'{col}_min'] = output_df[col].rolling(window=window_size, min_periods=1).min()
    output_df[f'{col}_max'] = output_df[col].rolling(window=window_size, min_periods=1).max()
    output_df[f'{col}_kurtosis'] = output_df[col].rolling(window=window_size, min_periods=1).apply(kurtosis, raw=True)
    output_df[f'{col}_skew'] = output_df[col].rolling(window=window_size, min_periods=1).apply(skew, raw=True)
    output_df[f'{col}_zero_crossings'] = output_df[col].rolling(window=window_size, min_periods=1).apply(zero_crossings, raw=True)


  output_df[f'{col}_zero_crossings'] = output_df[col].rolling(window=window_size, min_periods=1).apply(zero_crossings, raw=True)
  output_df[f'{col}_mean'] = output_df[col].rolling(window=window_size, min_periods=1).mean()
  output_df[f'{col}_std'] = output_df[col].rolling(window=window_size, min_periods=1).std()
  output_df[f'{col}_min'] = output_df[col].rolling(window=window_size, min_periods=1).min()
  output_df[f'{col}_max'] = output_df[col].rolling(window=window_size, min_periods=1).max()
  output_df[f'{col}_kurtosis'] = output_df[col].rolling(window=window_size, min_periods=1).apply(kurtosis, raw=True)
  output_df[f'{col}_skew'] = output_df[col].rolling(window=window_size, min_periods=1).apply(skew, raw=True)
  output_df[f'{col}_zero_crossings'] = output_df[col].rolling(window=window_size, min_periods=1).apply(zero_crossings, raw=True)
  output_df[f'{col}_mean'] = output_df[col].rolling(window=window_size, min_periods=1).mean()
  output_df[f'{col}_std'] = output_df[col].rolling(w

In [61]:
# 计算加速度和陀螺仪的模并添加到原DataFrame中
output_df['f_acc_norm'] = np.sqrt(output_df['f_acc_x']**2 + output_df['f_acc_y']**2 + output_df['f_acc_z']**2)
output_df['w_acc_norm'] = np.sqrt(output_df['w_acc_x']**2 + output_df['w_acc_y']**2 + output_df['w_acc_z']**2)
output_df['f_gyr_norm'] = np.sqrt(output_df['f_gyr_x']**2 + output_df['f_gyr_y']**2 + output_df['f_gyr_z']**2)
output_df['w_gyr_norm'] = np.sqrt(output_df['w_gyr_x']**2 + output_df['w_gyr_y']**2 + output_df['w_gyr_z']**2)


  output_df['f_acc_norm'] = np.sqrt(output_df['f_acc_x']**2 + output_df['f_acc_y']**2 + output_df['f_acc_z']**2)
  output_df['w_acc_norm'] = np.sqrt(output_df['w_acc_x']**2 + output_df['w_acc_y']**2 + output_df['w_acc_z']**2)
  output_df['f_gyr_norm'] = np.sqrt(output_df['f_gyr_x']**2 + output_df['f_gyr_y']**2 + output_df['f_gyr_z']**2)
  output_df['w_gyr_norm'] = np.sqrt(output_df['w_gyr_x']**2 + output_df['w_gyr_y']**2 + output_df['w_gyr_z']**2)


In [62]:
output_df['hr_mean'] = output_df['hr'].rolling(window=window_size, min_periods=1).mean()
output_df['hr_std'] = output_df['hr'].rolling(window=window_size, min_periods=1).std()
output_df['hr_min'] = output_df['hr'].rolling(window=window_size, min_periods=1).min()
output_df['hr_max'] = output_df['hr'].rolling(window=window_size, min_periods=1).max()
output_df['hr_kurtosis'] = output_df['hr'].rolling(window=window_size, min_periods=1).apply(kurtosis, raw=True)
output_df['hr_skew'] = output_df['hr'].rolling(window=window_size, min_periods=1).apply(skew, raw=True)
output_df['hr_zero_crossings'] = output_df['hr'].rolling(window=window_size, min_periods=1).apply(zero_crossings, raw=True)


  output_df['hr_mean'] = output_df['hr'].rolling(window=window_size, min_periods=1).mean()
  output_df['hr_std'] = output_df['hr'].rolling(window=window_size, min_periods=1).std()
  output_df['hr_min'] = output_df['hr'].rolling(window=window_size, min_periods=1).min()
  output_df['hr_max'] = output_df['hr'].rolling(window=window_size, min_periods=1).max()
  output_df['hr_kurtosis'] = output_df['hr'].rolling(window=window_size, min_periods=1).apply(kurtosis, raw=True)
  output_df['hr_skew'] = output_df['hr'].rolling(window=window_size, min_periods=1).apply(skew, raw=True)
  output_df['hr_zero_crossings'] = output_df['hr'].rolling(window=window_size, min_periods=1).apply(zero_crossings, raw=True)


In [63]:
for col in all_columns:
    output_df[f'{col}_fft_mean'] = output_df[col].rolling(window=window_size, min_periods=1).apply(lambda x: fft_features(x), raw=True)


  return np.abs(fft_coeff[:len(fft_coeff)//2]).mean()  # 只保留前半部分系数的均值
  output_df[f'{col}_fft_mean'] = output_df[col].rolling(window=window_size, min_periods=1).apply(lambda x: fft_features(x), raw=True)
  return np.abs(fft_coeff[:len(fft_coeff)//2]).mean()  # 只保留前半部分系数的均值
  output_df[f'{col}_fft_mean'] = output_df[col].rolling(window=window_size, min_periods=1).apply(lambda x: fft_features(x), raw=True)
  return np.abs(fft_coeff[:len(fft_coeff)//2]).mean()  # 只保留前半部分系数的均值
  output_df[f'{col}_fft_mean'] = output_df[col].rolling(window=window_size, min_periods=1).apply(lambda x: fft_features(x), raw=True)
  return np.abs(fft_coeff[:len(fft_coeff)//2]).mean()  # 只保留前半部分系数的均值
  output_df[f'{col}_fft_mean'] = output_df[col].rolling(window=window_size, min_periods=1).apply(lambda x: fft_features(x), raw=True)
  return np.abs(fft_coeff[:len(fft_coeff)//2]).mean()  # 只保留前半部分系数的均值
  output_df[f'{col}_fft_mean'] = output_df[col].rolling(window=window_size, min_periods=1).apply(lambda x: fft_fea

In [69]:
print(output_df.head())
rows_with_nan = output_df[output_df.isna().any(axis=1)]

output_df = output_df.dropna()

      Unnamed: 0     hr   f_acc_x   f_acc_y   f_acc_z   f_gyr_x   f_gyr_y  \
0  1717788245600  112.0  6.098917 -2.142345  7.317383  0.005079 -0.059492   
1  1717788245700  112.0  6.145470 -1.760938  7.425608  0.003556 -0.087058   
2  1717788245800  112.0  6.150560 -1.911375  7.400910 -0.000865 -0.104507   
3  1717788245900  112.0  6.146518 -1.787283  7.288344  0.004800 -0.129291   
4  1717788246000  112.0  6.264323 -2.213297  7.088958  0.019771 -0.244786   

    f_gyr_z   f_loc_h   f_loc_v  ...  f_loc_d_fft_mean  w_loc_h_fft_mean  \
0 -0.016369  0.165778  1.978385  ...               NaN               NaN   
1 -0.031184  0.165778  1.978385  ...        683.689270            5.9628   
2 -0.042368  1.533325  1.978385  ...       1025.533905            8.9442   
3 -0.041913  2.900873  1.978385  ...        683.689270            5.9628   
4 -0.086265  4.268420  1.978385  ...        854.611588            7.4535   

   w_loc_v_fft_mean  w_loc_d_fft_mean  f_mag_x_fft_mean  f_mag_y_fft_mean  \
0  

In [70]:
# X = output_df.drop(['act', 'Unnamed: 0', 'f_acc_x','f_acc_y', 'f_acc_z', 'f_gyr_x','f_gyr_y','f_gyr_z'], axis=1) # features
X = output_df.drop(['act', 'Unnamed: 0', 'w_loc_h','w_loc_v','w_loc_d'], axis=1) # features
# X = output_df[['hr', 'f_acc_x','f_acc_y', 'f_acc_z', 'f_gyr_x','f_gyr_y','f_gyr_z','f_mag_x', 'f_mag_y', 'f_mag_z','f_loc_h','f_loc_v','f_loc_d','w_gyr_x','w_gyr_y','w_gyr_z','w_acc_x','w_acc_y','w_acc_z','w_mag_x', 'w_mag_y','w_mag_z']]
y = output_df['act'] # label'
# 'f_loc_h','f_loc_v','f_loc_d' 没用 'w_loc_h','w_loc_v','w_loc_d'


In [77]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [79]:

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)


selector = SelectFromModel(rf, threshold='mean', prefit=True)
X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)


selected_features = X.columns[selector.get_support()]
print("Number of features before selection:", X.shape[1])
print("Number of features after selection:", X_train_selected.shape[1])
print("Selected features:", selected_features.tolist())

Number of features before selection: 225
Number of features after selection: 64
Selected features: ['f_loc_v', 'f_acc_x_mean', 'f_acc_x_std', 'f_acc_y_zero_crossings', 'w_acc_x_mean', 'w_acc_x_min', 'w_acc_x_max', 'w_acc_y_mean', 'w_acc_y_max', 'w_acc_y_zero_crossings', 'w_acc_z_mean', 'w_acc_z_max', 'w_acc_z_zero_crossings', 'f_gyr_x_mean', 'f_gyr_x_max', 'f_gyr_x_skew', 'f_gyr_y_mean', 'f_gyr_y_max', 'f_gyr_z_mean', 'f_gyr_z_max', 'w_gyr_x_min', 'w_gyr_y_mean', 'w_gyr_y_min', 'f_loc_v_mean', 'f_loc_v_min', 'f_loc_v_max', 'w_loc_h_max', 'w_loc_v_mean', 'w_loc_v_min', 'w_loc_d_mean', 'w_loc_d_std', 'w_loc_d_min', 'w_loc_d_max', 'f_mag_x_mean', 'f_mag_x_std', 'f_mag_x_min', 'f_mag_x_max', 'f_mag_y_std', 'f_mag_y_min', 'f_mag_y_max', 'f_mag_y_zero_crossings', 'f_mag_z_mean', 'f_mag_z_std', 'f_mag_z_min', 'f_mag_z_max', 'w_mag_x_mean', 'w_mag_x_min', 'w_mag_x_max', 'w_mag_y_min', 'w_mag_z_mean', 'w_mag_z_std', 'w_mag_z_min', 'w_mag_z_max', 'w_mag_z_zero_crossings', 'hr_max', 'f_acc_x_fft_



In [80]:
knn_classifier = KNeighborsClassifier(n_neighbors=20)
knn_classifier.fit(X_train_selected, y_train)

In [82]:
y_pred = knn_classifier.predict(X_test_selected)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9865384615384616
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98        49
           1       0.96      1.00      0.98       130
           2       1.00      1.00      1.00       196
           3       1.00      0.89      0.94        45
           4       0.99      0.99      0.99       100

    accuracy                           0.99       520
   macro avg       0.99      0.97      0.98       520
weighted avg       0.99      0.99      0.99       520



## no feature engineering

In [86]:
df = pd.read_csv("output.csv")
X = df.drop(['act', 'Unnamed: 0', 'w_loc_h','w_loc_v','w_loc_d'], axis=1) # features
y = df['act']  # 目标变量
df['act'] = df['act'].map({'Others': 0, 'Normal': 1, 'Turning': 2,'Crouching': 3, 'Falling': 4})

In [87]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [90]:
knn_classifier = KNeighborsClassifier(n_neighbors=20)
knn_classifier.fit(X_train, y_train)

In [91]:
y_pred = knn_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9136276391554703
Classification Report:
               precision    recall  f1-score   support

   Crouching       0.89      0.98      0.93        48
     Falling       0.93      0.85      0.89        94
      Normal       0.92      0.95      0.94       131
      Others       0.92      0.69      0.79        51
     Turning       0.91      0.96      0.93       197

    accuracy                           0.91       521
   macro avg       0.91      0.89      0.90       521
weighted avg       0.91      0.91      0.91       521

