In [5]:
import os
import pandas as pd
import numpy as np
import joblib
import datetime as dt

In [6]:
test_01_plc = pd.read_csv('../raw_data/test_add/01/PLC/plc.csv')
test_02_plc = pd.read_csv('../raw_data/test_add/02/PLC/plc.csv')
test_03_plc = pd.read_csv('../raw_data/test_add/03/PLC/plc.csv')
test_04_plc = pd.read_csv('../raw_data/test_add/04/PLC/plc.csv')
test_05_plc = pd.read_csv('../raw_data/test_add/05/PLC/plc.csv')

In [7]:
def process_time(plc_df):
    plc_df['time'] = pd.to_datetime(plc_df.time,format='%H:%M:%S:%f')
    plc_df = plc_df.reset_index(drop=True)
    return plc_df

def CalCostLife(offset,start,cur,base_cost):
    '''
    描述：
        计算已经花费的有效时间(CL)
    '''
    cost = (offset*5-1) + (cur-start).value/(1e9*60) #min
    cost_life = base_cost + cost
    return cost_life

def CalCostLifeInterval(offset,base_cost):
    '''
    描述：
        计算已经花费的有效时间区间(CLI, Cost Life Interval)
    '''
    cost_interval = offset*5
    cost_life_interval = base_cost + cost_interval
    return cost_life_interval

def TransTime(data_plc, csv_nos=48, base_cost=0):
    '''
    描述：
        把plc文件中的时间转化为寿命相关特征
    参数：
        data_plc：
        csv_nos：plc对应的sensor文件数量
        total_time:寿命
    '''
    data_plc = data_plc.loc[data_plc['csv_no']<=csv_nos]
    cl = []
    cli = []
    for i in range(1,csv_nos+1):
        time_seg = data_plc.loc[data_plc['csv_no']==i, 'time']
        start = time_seg.min()
        cl.append(time_seg.apply(lambda x:CalCostLife(i, start, x, base_cost)).values)
        cli.append(time_seg.apply(lambda x:CalCostLifeInterval(i, base_cost)).values)
        
    cl_arr = np.hstack(cl)
    cli_arr = np.hstack(cli)
    
    result = data_plc.drop(columns=['time'])
    result['CL'] = cl_arr
    result['CLI'] = cli_arr
    return result

In [8]:
test_01_plc = process_time(test_01_plc)
test_02_plc = process_time(test_02_plc)
test_03_plc = process_time(test_03_plc)
test_04_plc = process_time(test_04_plc)
test_05_plc = process_time(test_05_plc)

data_01 = TransTime(test_01_plc, 10, 40)
data_02 = TransTime(test_02_plc, 10, 70)
data_03 = TransTime(test_03_plc, 10, 50) 
data_04 = TransTime(test_04_plc, 10, 70)
data_05 = TransTime(test_05_plc, 10, 120) 

In [9]:
if not os.path.exists('test_plc_CL'):
    os.mkdir('test_plc_CL')
output_dir = './test_plc_CL'
joblib.dump(data_01,os.path.join(output_dir,'test_01_plc.lz4'))
joblib.dump(data_02,os.path.join(output_dir,'test_02_plc.lz4'))
joblib.dump(data_03,os.path.join(output_dir,'test_03_plc.lz4'))
joblib.dump(data_04,os.path.join(output_dir,'test_04_plc.lz4'))
joblib.dump(data_05,os.path.join(output_dir,'test_05_plc.lz4'))

['./test_plc_CL/test_05_plc.lz4']