# 0. Import Packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import os
import glob
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
import math

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from keras_preprocessing.sequence import TimeseriesGenerator
from keras import Sequential
from keras import layers
from keras.src.layers import LSTM, GRU, RNN, Dense, Flatten
from keras.src.optimizers.rmsprop import RMSprop
from keras.src.callbacks import ModelCheckpoint, EarlyStopping
from keras.src.models.model import model_from_json
from keras import optimizers

# 1. Load data

In [None]:
# 온도 선택 [N10, 0, 10, 20, 25, 30, 40, 50], temp 변수에 설정

temp = '20'
mode = ['DST', 'FUDS' ,'US06']
current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

for m in mode:
    data_dir = os.path.join(parent_dir, f'DB Preprocessing/refined_data/{temp}/{m}')
    file_names = os.listdir(data_dir)
    for file_name in file_names:
        csv_dir = os.path.join(data_dir, file_name)
        if '007' in file_name:
            num = '007'
        else: 
            num = '008'
        globals()['csv_{}'.format(f'{m}_{num}')] = pd.DataFrame(pd.read_csv(csv_dir))
        print(f'csv_{m}_{num} 생성 완료')

In [None]:
csv_DST_007

# 2. V, I, T / SoC 분리 

In [None]:
num = ['007','008']

def df2numpy(df):
    np = df.values
    return np

for m in mode:
    for n in num:
        var_name = f'csv_{m}_{n}'
        csv = globals()[var_name]
        globals()[f'input_{m}_{n}'] = df2numpy(csv[['Current(A)', 'Voltage(V)', 'Temperature (C)_1']])
        print(f'input_{m}_{n} 생성 완료')
        globals()[f'output_{m}_{n}'] = df2numpy(csv[['SoC']])
        print(f'output_{m}_{n} 생성 완료')

In [None]:
print(input_DST_007.shape)
input_DST_007

In [None]:
print(input_US06_008.shape)
input_US06_008

# 3. Split train and test dataset

In [None]:
def create_dataset(input, output, look_back=1):
    dataX, dataY = [], []
    for i in range(len(input) - look_back + 1):
        a = input[i:(i + look_back)]
        dataX.append(a)
        dataY.append(output[i + look_back -1])
    dataY = np.reshape(dataY, (len(dataY), 1, 1))
    print(dataY.shape)
    return np.array(dataX), np.array(dataY)

In [None]:
# look_back 변수 설정(자연수. 보통 20 설정.)
look_back = 20

dstX_7, dstY_7 = create_dataset(input_DST_007, output_DST_007, look_back)
dstX_8, dstY_8 = create_dataset(input_DST_008, output_DST_008, look_back)
us06X_7, us06Y_7 = create_dataset(input_US06_007, output_US06_007, look_back)
us06X_8, us06Y_8 = create_dataset(input_US06_008, output_US06_008, look_back)
fudsX_7, fudsY_7 = create_dataset(input_FUDS_007, output_FUDS_007, look_back)
fudsX_8, fudsY_8 = create_dataset(input_FUDS_008, output_FUDS_008, look_back)

print(dstX_7.shape)
print(dstY_7.shape)

# 4. Training with GRU

In [None]:
model = Sequential()
model.add(GRU(512, input_shape=(look_back, 3), return_sequences=True, activation='relu'))
model.add(GRU(128, return_sequences=True, activation='relu'))
model.add(GRU(8, return_sequences=False))
model.add(Dense(1))

# optimizer는 알고리즘 종류.
# SGD, RMSprop, Adagrad, Adadelta, Adam이 있음(Adam이 가장많이 사용 됨)
# loss는 정답과 예측값을 비교하는 모델. 회귀에서 사용.
# 모델의 성능을 올리기 위해 loss 임의 변형 가능. 근데 우리는 mse 사용.
# metrics는 평가 지표. 모델의 성능을 평가할 때 사용
model.compile(optimizer = 'adam', loss = 'mse', metrics = ['mse', 'mae'])

In [None]:
# 모델 학습
# epochs는 전체 데이터를 학습에 사용하는 수 / underfitting overfitting의 원인
# epochs = 40이라면 40바퀴 돌린다고 생각하면 됨
# batch_size는 잘라서 주는 수
# 데이터가 2000개, epochs=20, batch_size=500이라면
# 1 epoch는 데이터 사이즈가 500인 batch가 들어간 4번의 iteration으로 들어감
# 전체 데이터 셋에 대해서는 총 20번의 학습, iteration 기준으로는 총 80번의 학습
# verbose는 그냥 돌아가는 과정을 출력하는것. 2는 epoch마다 한 줄 씩 출력.
# 스크롤 귀찮으면 안해도 될듯
# shuffle은 학습 시 데이터 순서를 섞어서 주는 것
# 비교해봤는데 True로 놓는게 오차율이 적다

epochs = 20
batch_size = 64
verbose = 0
shuffle = True
early_stopping = EarlyStopping()

history_dst_7 = model.fit(dstX_7, dstY_7, epochs=epochs, batch_size=batch_size, validation_data=(fudsX_7, fudsY_7), verbose=verbose, shuffle=shuffle, callbacks = [early_stopping])
history_us06_7 = model.fit(us06X_7, us06Y_7, epochs=epochs, batch_size=batch_size, validation_data=(fudsX_7, fudsY_7), verbose=verbose, shuffle=shuffle, callbacks = [early_stopping])
history_dst_8 = model.fit(dstX_8, dstY_8, epochs=epochs, batch_size=batch_size, validation_data=(fudsX_8, fudsY_8), verbose=verbose, shuffle=shuffle, callbacks = [early_stopping])
history_us06_8 = model.fit(us06X_8, us06Y_8, epochs=epochs, batch_size=batch_size, validation_data=(fudsX_8, fudsY_8), verbose=verbose, shuffle=shuffle, callbacks = [early_stopping])

In [None]:
# save model architecture
model_dir = os.path.join(current_dir, 'model', temp)
model_json = model.to_json()
open(f'{model_dir}/{temp}_model.json', 'w').write(model_json)

# save model's learned weights
model.save_weights(f'{model_dir}/{temp}.weights.h5', overwrite=True)

# 5. Result

In [None]:
# Load trained model
json_file = open(f'{model_dir}/{temp}_model.json', "r")
loaded_model_json = json_file.read() 
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# model weight load 
loaded_model.load_weights(f'{model_dir}/{temp}.weights.h5')
print("Loaded model from disk")

In [None]:
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.plot(history_dst_7.history['loss'], label='train')
plt.plot(history_dst_7.history['val_loss'], label='test')
plt.title("007_loss")
plt.legend() 

plt.subplot(1,2,2)
plt.plot(history_dst_8.history['loss'], label='train')
plt.plot(history_dst_8.history['val_loss'], label='test')
plt.title("008_loss")
plt.legend() 
plt.show()

In [None]:
# SoC 예측 정답 비교 Plot
fudsY_7_hat = model.predict(fudsX_7)
fudsY_8_hat = model.predict(fudsX_8)
fudsY_7 = np.reshape(fudsY_7, (fudsY_7.shape[0], 1))
fudsY_8 = np.reshape(fudsY_8, (fudsY_8.shape[0], 1))

plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.plot(fudsY_7_hat, label='Prediction')
plt.plot(fudsY_7, label='Real data')
plt.title("007_SoC")

plt.subplot(1,2,2)
plt.plot(fudsY_8_hat, label='Prediction')
plt.plot(fudsY_8, label='Real data')
plt.title("008_SoC")
plt.legend()
plt.show()

In [None]:
# 007, 008 배터리 RMSE, MAE
rmse = math.sqrt(mean_squared_error(fudsY_7, fudsY_7_hat))
mae = mean_absolute_error(fudsY_7, fudsY_7_hat)
print('Test 007 RMSE: %.3f' % rmse)
print('Test 007 MAE: %.3f' % mae)

rmse = math.sqrt(mean_squared_error(fudsY_8, fudsY_8_hat))
mae = mean_absolute_error(fudsY_8, fudsY_8_hat)
print('Test 008 RMSE: %.3f' % rmse)
print('Test 008 MAE: %.3f' % mae)