In [6]:
import pandas as pd
import numpy as np
import joblib
from joblib import Parallel,delayed
from sklearn.preprocessing import scale
import os
import matplotlib.pyplot as plt

In [13]:
def concat_sensors(train_no, csv_nos):
    '''
    描述：
        合并所有的sensor文件
    '''
    input_dir = './sensors_tsfresh_comprehensive/%s/'%train_no
    def basis_func(idx):
        sensor = joblib.load(input_dir + '%d.lz4'%idx)
        return sensor
    sensors = Parallel(n_jobs=24, verbose=10)(delayed(basis_func)(i) for i in range(1,csv_nos+1))
    res = pd.concat(sensors, axis=0)
    res.reset_index(drop=True, inplace=True)
    return res

def concat_sensors_plc(train_no, csv_nos):
    '''
    描述：
        拼接plc和sensors
    '''
    input_dir = './test_plc_CL/'
    # 拼接
    plc = joblib.load(input_dir + 'test_%s.lz4'%train_no)
    sensors = concat_sensors(train_no, csv_nos)
    
    tmp = pd.concat([plc[['csv_no', 'CL', 'CLI', 'spindle_load']],sensors], axis=1)
    return tmp 
    

def filter_inf(df):
    '''
    描述：
        过滤有inf值的样本
    '''
    inf_filter = np.sum(df.values == np.inf, axis=1)
    df = df[inf_filter==0]
    df.reset_index(drop=True, inplace=True)
    print('有%d行inf值'%sum(inf_filter))
    return df

def filter_nan(df):
    '''
    描述：
        过滤有nan值的样本
    注意：
        np.nan == np.nan  False
    '''
    nan_filter = df.isnull().sum(axis=1)>0
    df = df[nan_filter==0]
    df.reset_index(drop=True, inplace=True)
    print('有%d行nan值'%sum(nan_filter))
    return df


def write_file(train_no, csv_nos):
    '''
    描述：
        拼接文件，去除inf值和nan值，并输出文件
    '''
    if not os.path.exists('./concats'):
        os.mkdir('./concats')
    output_dir = './concats/'
    dev = concat_sensors_plc(train_no, csv_nos)
    dev = filter_inf(dev)
    dev = filter_nan(dev)
    joblib.dump(dev, output_dir + 'concat_%s.lz4'%train_no, compress='lz4')


In [14]:
write_file('01', 10)
write_file('02', 10)
write_file('03', 10)
write_file('04', 10)
write_file('05', 10)

[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Batch computation too fast (0.0093s.) Setting batch_size=42.
[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Batch computation too fast (0.0093s.) Setting batch_size=42.
[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=24)]: Using backend LokyBackend w

有0行inf值
有0行nan值
有0行inf值
有0行nan值
有0行inf值
有0行nan值


[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Batch computation too fast (0.0158s.) Setting batch_size=24.
[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Batch computation too fast (0.0098s.) Setting batch_size=40.
[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished


有0行inf值
有0行nan值
有0行inf值
有0行nan值


In [17]:
# import joblib
# test = joblib.load('./concats/concat_01.lz4')
# test.shape

In [18]:
# plc_01 = joblib.load('./plc_features/test_01.lz4')
# plc_01.head()