tsfresh提取特征

In [12]:
import joblib
import pandas as pd
import numpy as np
from tsfresh import select_features
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters
from tsfresh.feature_extraction import ComprehensiveFCParameters
from joblib import Parallel, delayed
import os

In [13]:
efficient_parameters = {
    'length': None,
    'abs_energy': None,
    
     'standard_deviation': None,
     'cid_ce': [{'normalize': True}, {'normalize': False}],
     'ar_coefficient': [{'coeff': 1, 'k': 10}],
     'fft_aggregated': [{'aggtype': 'centroid'},
                          {'aggtype': 'kurtosis'}],
    'quantile': [{'q': 0.1},
                  {'q': 0.2},
                  {'q': 0.3},
                  {'q': 0.4},
                  {'q': 0.6},
                  {'q': 0.7},
                  {'q': 0.8},
                  {'q': 0.9}],
     'autocorrelation': [
                  {'lag': 6},
                  {'lag': 7},
                  {'lag': 8},
                  {'lag': 9}],

    'agg_autocorrelation': [ {'f_agg': 'median'}],
    'approximate_entropy': [{'m': 2, 'r': 0.1},
                            {'m': 2, 'r': 0.3},
                            {'m': 2, 'r': 0.5},
                            {'m': 2, 'r': 0.7},
                            {'m': 2, 'r': 0.9}],

    'linear_trend': [{'attr': 'pvalue'},
                    {'attr': 'rvalue'},
                    {'attr': 'intercept'},
                    {'attr': 'slope'},
                    {'attr': 'stderr'}],
    
}

settings = efficient_parameters

In [19]:
def gen_tsfresh_feature_basis(sensor,settings):
    '''
    描述：
        settings，使用tsfresh提取指定特征
    参数：
        sensor：
        settings(dict): 想要提取的特征
    '''
    if 'vibration_1' in settings.keys():
        extracted_features = extract_features(sensor, column_id='id', column_sort='sort_col', n_jobs=48, 
                                          kind_to_fc_parameters=settings, disable_progressbar = True)
    else:
        extracted_features = extract_features(sensor, column_id='id', column_sort='sort_col', n_jobs=48, 
                                          default_fc_parameters=settings, disable_progressbar = False)
    return extracted_features


In [20]:
def gen_tsfresh_feature_parallel(train_no, csv_nos, settings):
    '''
    描述：
        并行提取多个senosr文件的tsfresh特征
    参数：
        train_no：第几个plc
        csv_nos：plc对应的sensor文件个数
    '''
    
    input_dir = './sensors_scale_cur/%s/'%train_no
    output_dir = './sensor_tsfresh_efficient/%s/'%train_no
    
    if not os.path.exists('./sensor_tsfresh_efficient'):
        os.mkdir('./sensor_tsfresh_efficient')
    if not os.path.exists('./sensor_tsfresh_efficient/%s'%train_no):
        os.mkdir('./sensor_tsfresh_efficient/%s'%train_no)
    
    def basis_func(idx):
        sensor = joblib.load(input_dir + '%d.lz4'%idx)
        tmp = gen_tsfresh_feature_basis(sensor, settings)
        joblib.dump(tmp, output_dir+'%d.lz4'%idx, compress='lz4')
        
    Parallel(n_jobs=1,verbose=1)(delayed(basis_func)(i) for i in range(1,csv_nos+1))

In [21]:
gen_tsfresh_feature_parallel('01', 48, settings)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.

Feature Extraction:   0%|          | 0/234 [00:00<?, ?it/s][A
Feature Extraction:   0%|          | 1/234 [00:17<1:08:45, 17.70s/it][A
Feature Extraction:   1%|▏         | 3/234 [00:17<22:53,  5.95s/it]  [A
Feature Extraction:   2%|▏         | 5/234 [00:17<13:42,  3.59s/it][A
Feature Extraction:   3%|▎         | 6/234 [00:18<11:32,  3.04s/it][A
Feature Extraction:   4%|▍         | 10/234 [00:18<06:56,  1.86s/it][A
Feature Extraction:   5%|▌         | 12/234 [00:18<05:50,  1.58s/it][A
Feature Extraction:   6%|▌         | 14/234 [00:19<04:59,  1.36s/it][A
Feature Extraction:   7%|▋         | 16/234 [00:19<04:21,  1.20s/it][A
Feature Extraction:   8%|▊         | 19/234 [00:19<03:38,  1.02s/it][A
Feature Extraction:   9%|▉         | 21/234 [00:19<03:18,  1.07it/s][A
Feature Extraction:  10%|▉         | 23/234 [00:19<03:02,  1.16it/s][A
Feature Extraction:  11%|█         | 25/234 [00:20<02:49,  1.23

KeyboardInterrupt: 

Process ForkPoolWorker-357:
Process ForkPoolWorker-350:
Process ForkPoolWorker-374:
Process ForkPoolWorker-348:
  File "/usr/local/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Process ForkPoolWorker-342:
Process ForkPoolWorker-355:
Process ForkPoolWorker-370:
Process ForkPoolWorker-346:
Process ForkPoolWorker-335:
Process ForkPoolWorker-372:
Process ForkPoolWorker-362:
Process ForkPoolWorker-377:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
Process ForkPoolWorker-356:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceb

In [None]:
gen_tsfresh_feature_parallel('02', 48 , settings)

In [None]:
gen_tsfresh_feature_parallel('03', 37, settings)