tsfresh提取特征

In [1]:
import joblib
import pandas as pd
import numpy as np
from tsfresh import select_features
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters
from tsfresh.feature_extraction import ComprehensiveFCParameters
from joblib import Parallel, delayed
import os

In [2]:
minimal_parameters = {
     'length': None,
     'abs_energy': None,
    
    'mean':None,
    'mean_abs_change': None,
    'mean_change': None,
     'variance': None,
     'skewness': None,
     'kurtosis': None,
     'cid_ce': [{'normalize': True}, {'normalize': False}],
}

settings = minimal_parameters

In [3]:
def gen_tsfresh_feature_basis(sensor,settings):
    '''
    描述：
        settings，使用tsfresh提取指定特征
    参数：
        sensor：
        settings(dict): 想要提取的特征
    '''
    if 'vibration_1' in settings.keys():
        extracted_features = extract_features(sensor, column_id='id', column_sort='sort_col', n_jobs=12, 
                                          kind_to_fc_parameters=settings, disable_progressbar = True)
    else:
        extracted_features = extract_features(sensor, column_id='id', column_sort='sort_col', n_jobs=12, 
                                          default_fc_parameters=settings, disable_progressbar = False)
    return extracted_features


In [4]:
def gen_tsfresh_feature_parallel(train_no, csv_nos, settings):
    '''
    描述：
        并行提取多个senosr文件的tsfresh特征
    参数：
        train_no：第几个plc
        csv_nos：plc对应的sensor文件个数
    '''
    
    input_dir = './sensors_scale_cur/%s/'%train_no
    output_dir = './sensor_tsfresh_minimal/%s/'%train_no
    
    if not os.path.exists('./sensor_tsfresh_minimal/'):
        os.mkdir('./sensor_tsfresh_minimal')
    if not os.path.exists('./sensor_tsfresh_minimal/%s'%train_no):
        os.mkdir('./sensor_tsfresh_minimal/%s'%train_no)
    
    def basis_func(idx):
        sensor = joblib.load(input_dir + '%d.lz4'%idx)
        tmp = gen_tsfresh_feature_basis(sensor, settings)
        joblib.dump(tmp, output_dir+'%d.lz4'%idx, compress='lz4')
        
    Parallel(n_jobs=10,verbose=10)(delayed(basis_func)(i) for i in range(1,csv_nos+1))

In [5]:
gen_tsfresh_feature_parallel('01', 48, settings)

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:   22.1s
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:   25.8s
[Parallel(n_jobs=10)]: Done  21 tasks      | elapsed:   41.8s
[Parallel(n_jobs=10)]: Done  34 out of  48 | elapsed:  1.0min remaining:   24.8s
[Parallel(n_jobs=10)]: Done  39 out of  48 | elapsed:  1.1min remaining:   15.2s
[Parallel(n_jobs=10)]: Done  44 out of  48 | elapsed:  1.3min remaining:    6.9s
[Parallel(n_jobs=10)]: Done  48 out of  48 | elapsed:  1.3min finished


In [6]:
gen_tsfresh_feature_parallel('02', 48 , settings)

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:   21.3s
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:   39.1s
[Parallel(n_jobs=10)]: Done  21 tasks      | elapsed:   43.3s
[Parallel(n_jobs=10)]: Done  34 out of  48 | elapsed:  1.0min remaining:   25.4s
[Parallel(n_jobs=10)]: Done  39 out of  48 | elapsed:  1.1min remaining:   15.7s
[Parallel(n_jobs=10)]: Done  44 out of  48 | elapsed:  1.3min remaining:    6.9s
[Parallel(n_jobs=10)]: Done  48 out of  48 | elapsed:  1.3min finished


In [7]:
gen_tsfresh_feature_parallel('03', 37, settings)

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:   21.1s
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:   25.1s
[Parallel(n_jobs=10)]: Done  22 out of  37 | elapsed:   41.6s remaining:   28.4s
[Parallel(n_jobs=10)]: Done  26 out of  37 | elapsed:   49.2s remaining:   20.8s
[Parallel(n_jobs=10)]: Done  30 out of  37 | elapsed:   58.3s remaining:   13.6s
[Parallel(n_jobs=10)]: Done  34 out of  37 | elapsed:  1.0min remaining:    5.3s
[Parallel(n_jobs=10)]: Done  37 out of  37 | elapsed:  1.0min finished
