In [1]:
import os
import pandas as pd
import numpy as np
import joblib
import datetime as dt

In [2]:
train_01_plc = pd.read_csv('../raw_data/train_add/01/PLC/plc.csv')
train_02_plc = pd.read_csv('../raw_data/train_add/02/PLC/plc.csv')
train_03_plc = pd.read_csv('../raw_data/train_add/03/PLC/plc.csv')

In [3]:
def process_time(plc_df):
    plc_df['time'] = pd.to_datetime(plc_df.time,format='%H:%M:%S:%f')
#     plc_df = plc_df.sort_values(by='time')
    plc_df = plc_df.reset_index(drop=True)
    return plc_df

def CalRULR(offset,start,cur,total_life):
    '''
    描述：
        offset表示第几段(从1开始计算),其余输入为TimeStamp，输出单位是min.数据是5分钟中的最后一分钟
        
    '''
    cost = (offset*5-1) + (cur-start).value/(1e9*60) #min
    rulr = 1 - cost/total_life
    return rulr

def CalCostLife(offset,start,cur,base_cost):
    '''
    描述：
        计算已经花费的有效时间(CL)
    '''
    cost = (offset*5-1) + (cur-start).value/(1e9*60) #min
    cost_life = base_cost + cost
    return cost_life

def CalCostLifeInterval(offset,base_cost):
    '''
    描述：
        计算已经花费的有效时间区间(CLI, Cost Life Interval)
    '''
    cost_interval = offset*5
    cost_life_interval = base_cost + cost_interval
    return cost_life_interval

def TransTime(data_plc, csv_nos=48, base_cost=0, total_time =240):
    '''
    描述：
        把plc文件中的时间转化为剩余有用寿命比例RULR
    参数：
        data_plc：
        csv_nos：plc对应的sensor文件数量
        total_time:寿命
    '''
    data_plc = data_plc.loc[data_plc['csv_no']<=csv_nos]
    rulr = []
    cl = []
    cli = []
    for i in range(1,csv_nos+1):
        time_seg = data_plc.loc[data_plc['csv_no']==i, 'time']
        start = time_seg.min()
        rulr.append(time_seg.apply(lambda x:CalRULR(i, start, x, total_time)).values)
        cl.append(time_seg.apply(lambda x:CalCostLife(i, start, x, base_cost)).values)
        cli.append(time_seg.apply(lambda x:CalCostLifeInterval(i, base_cost)).values)
        
    rulr_arr = np.hstack(rulr)
    cl_arr = np.hstack(cl)
    cli_arr = np.hstack(cli)
    
    result = data_plc.drop(columns=['time'])
    result['RULR'] = rulr_arr
    result['CL'] = cl_arr
    result['CLI'] = cli_arr
    return result

In [4]:
# 原始数据被覆盖
train_01_plc = process_time(train_01_plc)
train_02_plc = process_time(train_02_plc)
train_03_plc = process_time(train_03_plc)

data_01 = TransTime(train_01_plc, 48, 0, 240)
data_02 = TransTime(train_02_plc, 48, 0, 240)
data_03 = TransTime(train_03_plc, 37, 0, 185)  # train_03_plc只有37份

In [5]:
if not os.path.exists('train_plc_RULR'):
    os.mkdir('train_plc_RULR')
output_dir = './train_plc_RULR'
joblib.dump(data_01,os.path.join(output_dir,'train_01_plc.lz4'))
joblib.dump(data_02,os.path.join(output_dir,'train_02_plc.lz4'))
joblib.dump(data_03,os.path.join(output_dir,'train_03_plc.lz4'))

['./train_plc_RULR/train_03_plc.lz4']