In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Install

In [None]:
!pip install openpyxl
!pip install pycaret

# Package

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.preprocessing import StandardScaler
import copy
import joblib
from pycaret.regression import *
import warnings
warnings.simplefilter('ignore')

In [None]:
today = datetime.today().strftime("%Y%m%d")
random_seed = 42

# Function

In [None]:
def timeline(data:pd.DataFrame, start_tuple:tuple, last_tuple:tuple, time:bool)->pd.DataFrame :

    start = data.values[start_tuple[0]][start_tuple[1]]
    last = data.values[last_tuple[0]][last_tuple[1]]

    try :
        start_date = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
        last_date = datetime.strptime(last, "%Y-%m-%d %H:%M:%S")

    except :
        start_date = start
        last_date = last

    timeline = []

    while start_date <= last_date:
        timeline.append(start_date)
        if time == True :
            start_date += timedelta(minutes=1)
        else :
            start_date += timedelta(hours=2)


    time_data = pd.DataFrame(timeline, columns=['날짜'])
    return time_data

In [None]:
# 데이터 읽기 수정 / input한 데이터의 날짜 기준
def data_build_v1(root:str, start_tuple:tuple, last_tuple:tuple, column_number:int, time:bool)->dict :
    data_dict = {}
    cnt = 0

    while(1) :
        try :
            data = pd.read_excel(root, engine='openpyxl', dtype=str, sheet_name=cnt)

        except :
            break
        fe_data = data.copy()
        fe_data.columns = fe_data.values[column_number].tolist()
        fe_data.drop(list(range(column_number+1)), axis=0, inplace=True)
        # fe_data.drop(fe_data.columns[0], axis=1, inplace=True)
        fe_data = fe_data.reset_index(drop=True)
        fe_data[fe_data.columns[0]] = pd.to_datetime(fe_data[fe_data.columns[0]])
        fe_data[fe_data.columns[0]] = fe_data[fe_data.columns[0]].dt.round(freq='s')
        fe_data.rename(columns={np.nan : "날짜"}, inplace=True)

        fe_result = pd.merge(fe_data, timeline(data, start_tuple, last_tuple, time=time), on=['날짜'], how='right')

        # fe_result = pd.concat([timeline(data, start_tuple, last_tuple, time=time), fe_data], axis=1)
        #fe_result.dropna(subset=['날짜'], axis=0, inplace=True)
        # fe_result[fe_result.columns[1:]] = fe_result[fe_result.columns[1:]].astype('float64')

        data_dict[cnt] = fe_result
        cnt+=1

    return data_dict

In [None]:
def data_build_2(root:str, column_number:int)->dict :
    data_dict = {}
    cnt = 0

    while(1) :
        try :
            data = pd.read_excel(root, engine='openpyxl', dtype=str, sheet_name=cnt)

        except :
            break
        fe_data = data.copy()
        fe_data.columns = fe_data.values[column_number].tolist()
        fe_data.drop([0, 1], axis=0, inplace=True)
        fe_data = fe_data.reset_index(drop=True)
        fe_data.columns = ['날짜'] + fe_data.columns[1:].tolist()

        fe_data.drop(fe_data.columns[-3:], axis=1, inplace=True)
        fe_data[fe_data.columns[2:]] = fe_data[fe_data.columns[2:]].astype('float64')

        data_dict[cnt] = fe_data
        cnt+=1

    return data_dict

# Dataset

In [None]:
data_path = '/content/drive/My Drive/Colab Notebooks/롯데케미칼'

In [None]:
fe_inf = data_build_v1(root = f'{data_path}/input/공정_데이터_sample.xlsx',
                   start_tuple = (0, 1), last_tuple = (0, 2), column_number=2, time=True)
lb_inf = data_build_2(root = f'{data_path}/input/물성_데이터_sample.xlsx',
                    column_number=0)

In [None]:
target_list = ['RPS_after_2', 'RPS_after_4', 'RPS_after_6', 'RPS_after_8', 'RPS_after_10', 'RPS_after_12']

In [None]:
model_dict = {}
scaler_dict = {}

for item in target_list :
  model_dict[item] = joblib.load(f'{data_path}/output/{item}_model.pkl')
  scaler_dict[item] = joblib.load(f'{data_path}/output/{item}_scaler.pkl')

# Timeline Setting

In [None]:
lb_inf[0]['날짜'] = pd.to_datetime(lb_inf[0]['날짜'])
inf_data = timeline(lb_inf[0], start_tuple = (0, 0), last_tuple = (-1, 0), time=False)
inf_data = pd.merge(inf_data, lb_inf[0], on=['날짜'], how='left')

In [None]:
'''
# 수집된 데이터의 미래를 예측할 경우
if inf_data['날짜'][0] == fe_inf[0].dropna(subset='TDDM FEED', axis=0)['날짜'].iloc[0] :
  inf_data = pd.merge(inf_data, fe_inf[0], on=['날짜'], how='left')
  start = (inf_data['날짜'][0] - fe_inf[0]['날짜'][0]).components[0]*12 + (inf_data['날짜'][0] - fe_inf[0]['날짜'][0]).components[1]
  inf_data['product_time'] = [start + 2 * i for i in range(len(inf_data))]

# 수정된 데이터의 미래를 예측할 경우
else :
  mfy_list = []
  for i in range(7) :
    mfy_list.append(fe_inf[0][fe_inf[0]['날짜']==fe_inf[0].dropna(subset='TDDM FEED', axis=0)['날짜'].iloc[0] + timedelta(hours=2*i)].index[0])
  mfy_data = fe_inf[0].loc[mfy_list]
  mfy_data = mfy_data.reset_index(drop=True)
  start = (mfy_data['날짜'][0] - fe_inf[0]['날짜'][0]).components[0]*12 + (mfy_data['날짜'][0] - fe_inf[0]['날짜'][0]).components[1]
  mfy_data['product_time'] = [start + 2 * i for i in range(len(mfy_data))]
  inf_data[mfy_data.columns[1:]] = mfy_data[mfy_data.columns[1:]]
'''

In [None]:
inf_data = pd.merge(inf_data, fe_inf[0], on=['날짜'], how='left')
start = (inf_data['날짜'][0] - fe_inf[0]['날짜'][0]).components[0]*12 + (inf_data['날짜'][0] - fe_inf[0]['날짜'][0]).components[1]
inf_data['product_time'] = [start + 2 * i for i in range(len(inf_data))]

# Imputation

In [None]:
inf_data[inf_data.columns[1:]] = inf_data[inf_data.columns[1:]].astype('float64')
inf_data[inf_data.columns[1:]] = inf_data[inf_data.columns[1:]].fillna(inf_data[inf_data.columns[1:]].interpolate())

# Lagging

In [None]:
fe_list = ['TDDM FEED', '산방제 FEED', '개시제 FEED', 'RUBBER SOLUTION FEED',
       'PL FEED RATE', 'R-1 중량', 'R-1 압력', 'R-1 온도', 'R-1 REFLUX',
       'R-1 AGITATOR RPM', 'R-2 중량', 'R-2 압력', 'R-2 온도', 'R-2 REFLUX',
       'R-2 AGITATOR RPM', 'DV-1 온도', 'ZAPPER 온도',
       'DV-1 JACKET 온도', 'DV-1 진공도', 'RECYCLE 후단 압력', 'DV-2 PREHEATER 온도',
       'DV-2 진공도', 'DV-2 JACKET 온도', 'RPS', 'product_time']

In [None]:
en_data = inf_data[['날짜']+fe_list]

In [None]:
for column in en_data.columns :
    if column in fe_list[:-1] :
        for i in range(1, 7) :
            en_data[f'{column}_before_{i*2}'] = en_data[column].shift(i)
en_data.dropna(subset=['TDDM FEED_before_12'], axis=0, inplace=True)
en_data = en_data.reset_index(drop=True)

# Inference

In [None]:
inference = en_data.drop(['날짜', 'RPS'], axis=1)
cnt = 0

result_list = []

for target in target_list :
  temp = copy.deepcopy(inference)
  if cnt <=2 :
    cols = temp.columns[temp.columns.str.contains('_12') | temp.columns.str.contains('RPS_before')]
  else :
    cols = temp.columns[temp.columns.str.contains('_6') | temp.columns.str.contains('RPS_before')]

  temp = temp[cols]
  temp['product_time'] = inference['product_time']

  # 저장
  # temp.to_csv(f'{data_path}/canvas/231218_{target}_inf.csv', index=False)

  temp_scaled = scaler_dict[target].transform(temp)
  temp = pd.DataFrame(temp_scaled, index=temp.index, columns=temp.columns)

  pred = model_dict[target]['trained_model'].predict(temp)
  result_list.append(pred[-1])
  cnt += 1

In [None]:
before_idx = [en_data['날짜'].iloc[-1] - timedelta(hours=2*i) for i in range(7)]
before_idx.sort()
before_result = pd.DataFrame(before_idx, columns=['날짜'])
before = pd.merge(before_result, inf_data[['날짜', 'RPS']], on=['날짜'], how='left')

pred_idx = [en_data['날짜'].iloc[-1] + timedelta(hours=2*i) for i in range(1, 7)]
final_result = pd.DataFrame(pred_idx, columns=['날짜'])
final_result['RPS'] = result_list
final_result = pd.concat([before, final_result], axis=0)
final_result = final_result.reset_index(drop=True)

# Export

In [None]:
# final_result.to_csv(f'{data_path}/output/infer_result.csv', index=False)