訓練模型

In [271]:
#%%
from keras.models import Sequential
from keras.models import load_model, save_model
from keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from keras.optimizers import Adam
from keras import backend as K

from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import joblib
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import os

#載入訓練資料
device = 'L4'
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
SourceData.dropna(inplace=True)
SourceData.reset_index(inplace=True)

In [272]:
input_features_model_1 = to_predict_features_model_1 = [
    'Hour',
    'Minute',
    
    'Avg_Temperature(°C)',
    'Avg_Humidity(%)',
    'Avg_Sunlight(Lux)',
    # 'Avg_Power(mW)',
    
    'Min_Temperature(°C)',
    'Min_Humidity(%)',
    'Min_Sunlight(Lux)',
    'Min_Power(mW)',
    
    'Max_Temperature(°C)',
    'Max_Humidity(%)',
    'Max_Sunlight(Lux)',
    'Max_Power(mW)'
    
    # 'Std_Avg_Temperature(°C)',
    # 'Std_Avg_Humidity(%)',
    # 'Std_Avg_Sunlight(Lux)',
    # 'Std_Avg_Power(mW)',
    
    # 'Std_Min_Temperature(°C)',
    # 'Std_Min_Humidity(%)',
    # 'Std_Min_Sunlight(Lux)',
    # 'Std_Min_Power(mW)',
    
    # 'Std_Max_Temperature(°C)',
    # 'Std_Max_Humidity(%)',
    # 'Std_Max_Sunlight(Lux)',
    # 'Std_Max_Power(mW)'
]

In [273]:
def create_dataset(data, LookBackNum):
    X = []
    y = []

    #設定每i-12筆資料(X_train)就對應到第i筆資料(y_train)
    for i in range(LookBackNum,len(data)):
        X.append(data[i-LookBackNum:i, :])
        y.append(data[i, :])

    return np.array(X), np.array(y)
  
#設定LSTM往前看的筆數和預測筆數
n_timesteps = LookBackNum = 48 #LSTM往前看的筆數，一筆10分鐘



#LSTM 選擇要留下來的資料欄位
AllOutPut = SourceData[to_predict_features_model_1].values


#正規化
scaler = MinMaxScaler()

X_preprocess_pipe = make_pipeline(
    scaler,
    # PolynomialFeatures(degree=2)
)

y_preprocess_pipe = make_pipeline(
    scaler,
)

X_preprocess_pipe.fit(AllOutPut)
AllOutPut_X_Scaled = X_preprocess_pipe.transform(AllOutPut)

y_preprocess_pipe.fit(AllOutPut)
AllOutPut_y_Scaled = y_preprocess_pipe.transform(AllOutPut)


X_train, _ = create_dataset(AllOutPut_X_Scaled, LookBackNum=LookBackNum)
_, y_train = create_dataset(AllOutPut_y_Scaled, LookBackNum=LookBackNum)

n_features = X_train.shape[2]
n_prediction = y_train.shape[1]

# Reshaping
#(samples 是訓練樣本數量,timesteps 是每個樣本的時間步長,features 是每個時間步的特徵數量)
X_train = np.reshape(X_train,(X_train.shape[0], n_timesteps, n_features))
X_train.shape

(5902, 48, 13)

In [274]:

#%%
#============================建置&訓練「LSTM模型」============================
#建置LSTM模型

# 自訂Loss Function
def mae_mse_loss(mae_ratio=0.8):
    def loss(y_true, y_pred):
        error = K.abs(y_true - y_pred)
        error_square = K.square(y_true - y_pred)
        return K.mean(mae_ratio * error + (1 - mae_ratio) * error_square)
    return loss 

regressor = Sequential()

### input layer ###
regressor.add(LSTM(units=256, return_sequences=True, activation='tanh', input_shape=(n_timesteps, n_features)))

### hidden layer ###
regressor.add(LSTM(units=256, return_sequences=True, activation='tanh'))

regressor.add(LSTM(units=256, return_sequences=True, activation='tanh'))

regressor.add(LSTM(units=256, return_sequences=False, activation='tanh'))

regressor.add(Dense(units=64))

### ouput layer ###
regressor.add(Dense(units=n_prediction)) 

regressor.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae', 'mse']
)

regressor.summary()

Model: "sequential_24"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_99 (LSTM)              (None, 48, 256)           276480    
                                                                 
 lstm_100 (LSTM)             (None, 48, 256)           525312    
                                                                 
 lstm_101 (LSTM)             (None, 48, 256)           525312    
                                                                 
 lstm_102 (LSTM)             (None, 256)               525312    
                                                                 
 dense_39 (Dense)            (None, 64)                16448     
                                                                 
 dense_40 (Dense)            (None, 13)                845       
                                                                 
Total params: 1,869,709
Trainable params: 1,869,709
N

In [275]:
#開始訓練
regressor.fit(X_train, y_train, epochs = 50, batch_size = 32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50

In [None]:
#保存模型
model_path = f'..//Model//WheatherLSTM_{device}.h5'
regressor.save(model_path)
print('Model Saved')

Model Saved


In [None]:
#載入模型
model = load_model(model_path, compile=False)
print('Model Loaded Successfully')

Model Loaded Successfully


In [None]:
def SeqNumber2Datetime(seq_number: int) -> datetime:
    seq_number = str(seq_number)
    datetime_str = seq_number[:12]
    return datetime.strptime(datetime_str, "%Y%m%d%H%M") 

def Datetime2SeqNumber(dt: datetime, device_id) -> int:
    base_seq_number = dt.strftime("%Y%m%d%H%M")
    seq_number_with_device = f"{base_seq_number}{device_id:02}"
    return seq_number_with_device

In [None]:
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
PredictedData = SourceData.copy()

TestData = pd.read_csv('..//Data/TestData//upload(no answer).csv')

TestData = TestData[TestData['序號'] % 100 == int(device[1:])]

to_predict_sequmber = TestData['序號'].to_list()

# 預測的資料 的 index
indices_1 = PredictedData[PredictedData['SeqNumber'].isin(to_predict_sequmber)][to_predict_features_model_1].index.to_list()
len(indices_1)

288

In [None]:
index_min = min(indices_1) - n_timesteps
index_max = max(indices_1)

indices_2 = PredictedData.loc[index_min:index_max][to_predict_features_model_1].index.tolist()

# 找出有 NaN 的 row
rows_with_na = PredictedData.loc[indices_2, to_predict_features_model_1].isnull().any(axis=1)
rows_with_na_data = PredictedData.loc[indices_2, to_predict_features_model_1][rows_with_na]

# 有 NaN 的 row 的 index
indices_with_na =  rows_with_na_data.index.to_list()
len(indices_with_na)

1443

In [None]:
 # 如果 LookBackNum > 12 選 indices_with_na
 # 其餘選 indices_1
indices_to_use = []
if LookBackNum > 12:
    indices_to_use = indices_with_na
else:
    indices_to_use = indices_1

for index in indices_to_use:
    X = PredictedData.loc[index-LookBackNum : index-1][to_predict_features_model_1].values
    X = X_preprocess_pipe.transform(X)
    X = np.reshape(X,(1, n_timesteps, n_features))
    pred = model.predict(X)
    pred = pred[0]
    PredictedData.loc[index, to_predict_features_model_1] = pred



In [None]:
PredictedData.loc[indices_1][['SeqNumber'] + to_predict_features_model_1].to_csv(f'..//Data//PredictedData//Predicted_{device}.csv', index=False)

In [None]:
TrainData = pd.read_csv(f'..//Data//MergedSorted//{device}_Merged_Sorted.csv')

TrainData.dropna(inplace=True)
TrainData.reset_index(inplace=True)

input_features = to_predict_features_model_1

to_predict_features = [
    'Avg_Power(mW)',
    ]

In [None]:
# 訓練 迴歸模型
X_train = TrainData[to_predict_features_model_1].values
y_train = TrainData[to_predict_features].values

X_train = X_preprocess_pipe.fit_transform(X_train)

reg_model = make_pipeline(
    PolynomialFeatures(degree=2),
    LinearRegression()
)

cv_scores = cross_val_score(reg_model, X_train, y_train, cv=10)
cv_scores

array([0.99133079, 0.99230525, 0.99472568, 0.99702669, 0.99611572,
       0.98826258, 0.99526574, 0.99758318, 0.99721775, 0.99795973])

In [None]:
reg_model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_pred = reg_model.predict(X_train)
y_pred = y_pred = np.clip(y_pred, 0, None)

print('MSE: ',mean_squared_error(y_train, y_pred))
print('MAE: ',mean_absolute_error(y_train, y_pred))
print('R2:',r2_score(y_train, y_pred))

MSE:  975.1119057673792
MAE:  14.35080084332298
R2: 0.9966199324499508


In [None]:
# 預測 發電量
PredictedData = pd.read_csv(f'..//Data//PredictedData//Predicted_{device}.csv')

X_Test = PredictedData[to_predict_features_model_1].values

y_pred = reg_model.predict(X_Test)
y_pred = np.clip(y_pred, 0, None)
PredictedData[to_predict_features] = y_pred
PredictedData.to_csv(f'..//Data//PredictedData//Predicted_{device}.csv', index=False)