In [6]:
from scipy import io
import numpy as np

def get_feature_dict(filename):
    """将ninapro_feature的MAT文件加载为字典

    Args:
        path: mat文件路径

    Returns:
        数据集字典
        [feat_set, featStim, featRep]
    """
    # 读取MAT文件
    print('load file: ' + filename + '...', end= '', flush=True)
    dict_feature=io.loadmat(filename)
    if (dict_feature != ()):
        #print(ninapro_data.keys())
        print('[ok]:%d'%(len(dict_feature['featStim'])), flush=True)
    # 返回字典
    return dict_feature

def split_zeros(feature_dict,feature_name,channels):
    """将ninapro_feature数据集中【restimulate】为0的部分（受试者不做动作）从数据集中去除

    Args:
        feature_dict: 数据集字典
        feature_name: 待处理的数据的keyvalue
        channels: 待处理的数据的通道数

    Returns:
        [feature_split, labels] 去除0部分的数据，对于的label(numpy array)
    """
    feature_split = None
    index = []
    for i in range(len(feature_dict['featStim'])):
        if feature_dict['featStim'][i]!=0:
            index.append(i)
    # 重排元素
    emg_temp = feature_dict[feature_name]
    emg_temp = np.reshape(emg_temp,(-1,4,channels))
    emg_temp = np.swapaxes(emg_temp,1,2)
    # 去除0label
    if(feature_split is None):
        feature_split = emg_temp[index,:,:]
        labels = feature_dict['featStim'][index,:]
    else:
        feature_split = np.vstack((feature_split,emg_temp[index,:,:])) 
        labels = np.vstack((labels,feature_dict['featStim'][index,:]))
    return feature_split, labels

# 对多组数据合并，预处理
def merge_multisubject(b,e):
    """将多组数据从mat文件中提取出来，预处理后合并

    Args:
        b: 开始的受试者序号
        e: 结束的受试者序号

    Returns:
        [emg,acc,gyro,mag,labels]肌电c12，加速度c36，角速度c36，磁强c36数据和标签。
    """
    emg_feature = None
    labels = None
    # 遍历受试者序号
    for i in range(b,e+1):
        emg_dict = get_feature_dict("../feature/DWTfeature_S{0}.mat".format(i))
        # 寻找动作为0的元素并剔除
        emg,labels = split_zeros(emg_dict,'feat_set',12)
        #print('delete 0 label,',emg_temp[index,:,:].shape)
    # s = [1.28889041e-05, 0.00000000e+00, 1.72402617e+01, 1.57331247e+01, 2.11883893e-03]
    # 归一化
    # for i in range(5):
    #     #s[i] = np.sum(np.abs(emg_feature[:,:,i]))/emg_feature[:,:,i].size
    #     #print("avg=",s)
    #     if(s[i]!=0):
    #         emg_feature[:,:,i] /= s[i]
    #         emg_feature[:,:,i] -= 0.5*s[i]
    return emg, labels

# 读取2组数据分布作训练集和验证集
emg_feature,labels = merge_multisubject(1,1)
emg_feature_test,labels_test = merge_multisubject(2,2)
print(emg_feature.shape,labels.shape)
print(emg_feature_test.shape,labels.shape)



load file: ../feature/DWTfeature_S1.mat...[ok]:206751
load file: ../feature/DWTfeature_S2.mat...[ok]:206659
(97210, 12, 4) (97210, 1)
(105147, 12, 4) (97210, 1)


In [7]:
# 训练集预处理
emg_feature = np.reshape(emg_feature,(-1,48))
labels = labels.flatten() - 1
print(emg_feature.shape,labels.shape)

# 数据集预处理
emg_feature_test = np.reshape(emg_feature_test,(-1,48))
labels_test = labels_test.flatten() - 1
print(emg_feature_test.shape,labels_test.shape)

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# 随机森林训练
print('training...')
model = RandomForestClassifier(random_state=0,max_depth = 10)
model.fit(emg_feature, labels)

# train_acc
score_a = model.score(emg_feature,labels)
print(score_a)
# 随机森林验证
score_t = model.score(emg_feature_test,labels_test)
print(score_t)

-a----         2022/12/7     22:19           7402 ninapro_utils.py
-a----        2022/12/13      2:40          19014 sEMG_proc_CNNs.ipynb
-a----        2022/12/13      2:46         197845 sEMG_proc_dataExtraction.ipynb
-a----        2022/12/13     15:04           5972 sEMG_proc_DWT.ipynb
-a----        2022/12/13      1:57          54737 sEMG_proc_LFM.ipynb
-a----        2022/12/12     17:50          45352 semg_train.ipynb

(97210, 48) (97210,)
(105147, 48) (105147,)
training...
0.8795802900936117
0.12319895004137066
