tsfresh提取特征

In [1]:
import joblib
import pandas as pd
import numpy as np
from tsfresh import select_features
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters
from tsfresh.feature_extraction import ComprehensiveFCParameters
from joblib import Parallel, delayed
import os

In [2]:
# 通过运行ComprehensiveFCParameters()可以得到完整的特征设置的字典
minimal_parameters = {
     'length': None,
     'abs_energy': None,
    
    'mean':None,
    'mean_abs_change': None,
    'mean_change': None,
     'variance': None,
     'skewness': None,
     'kurtosis': None,

}
settings = minimal_parameters


In [3]:
def gen_tsfresh_feature_basis(sensor,settings):
    '''
    描述：
        settings，使用tsfresh提取指定特征
    参数：
        sensor：
        settings(dict): 想要提取的特征
    '''
    if 'vibration_1' in settings.keys():
        extracted_features = extract_features(sensor, column_id='id', column_sort='sort_col', n_jobs=48, 
                                          kind_to_fc_parameters=settings, disable_progressbar = True)
    else:
        extracted_features = extract_features(sensor, column_id='id', column_sort='sort_col', n_jobs=48, 
                                          default_fc_parameters=settings, disable_progressbar = False)
    return extracted_features


In [4]:
def gen_tsfresh_feature_parallel(data_no, csv_nos, settings):
    '''
    描述：
        并行提取多个senosr文件的tsfresh特征
    参数：
        data_no：第几个plc
        csv_nos：plc对应的sensor文件个数
    '''
    
    input_dir = './sensors_ad/%s/'%data_no
    output_dir = './sensors_tsfresh_comprehensive/%s/'%data_no
    
    if not os.path.exists('./sensors_tsfresh_comprehensive/'):
        os.mkdir('./sensors_tsfresh_comprehensive')
    if not os.path.exists('./sensors_tsfresh_comprehensive/%s'%data_no):
        os.mkdir('./sensors_tsfresh_comprehensive/%s'%data_no)
    
    def basis_func(idx):
        sensor = joblib.load(input_dir + '%d.lz4'%idx)
        tmp = gen_tsfresh_feature_basis(sensor, settings)
        joblib.dump(tmp, output_dir+'%d.lz4'%idx, compress='lz4')
        
    Parallel(n_jobs=1,verbose=10)(delayed(basis_func)(i) for i in range(1,csv_nos+1))

In [5]:
gen_tsfresh_feature_parallel('01', 10, settings)
gen_tsfresh_feature_parallel('02', 10 , settings)
gen_tsfresh_feature_parallel('03', 10, settings)
gen_tsfresh_feature_parallel('04', 10 , settings)
gen_tsfresh_feature_parallel('05', 10, settings)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
Feature Extraction: 100%|██████████| 237/237 [00:01<00:00, 187.26it/s]
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.5s remaining:    0.0s
Feature Extraction: 100%|██████████| 236/236 [00:00<00:00, 250.05it/s]
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   13.3s remaining:    0.0s
Feature Extraction: 100%|██████████| 237/237 [00:00<00:00, 247.92it/s]
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   19.2s remaining:    0.0s
Feature Extraction: 100%|██████████| 232/232 [00:00<00:00, 241.79it/s]
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   24.9s remaining:    0.0s
Feature Extraction: 100%|██████████| 238/238 [00:00<00:00, 262.97it/s]
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   30.5s remaining:    0.0s
Feature Extraction: 100%|██████████| 235/235 [00:00<00:00, 236.38it/s]
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   36.3s remaining:    0.0s
Feature Extra

In [3]:
# import joblib
# test = joblib.load('./sensors_tsfresh_comprehensive/01/1.lz4')
# test.shape