訓練模型

In [1]:
#%%
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM,BatchNormalization, Bidirectional

from keras.models import load_model

import joblib
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import os

#載入訓練資料
device = 'L1'
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
SourceData.dropna(inplace=True)
SourceData.reset_index(inplace=True)

In [2]:
input_features_model_1 = to_predict_features_model_1 = [
    'Std_Avg_Temperature(°C)',
    'Std_Avg_Humidity(%)',
    'Std_Avg_Sunlight(Lux)',
    'Std_Min_Temperature(°C)',
    'Std_Min_Humidity(%)',
    'Std_Min_Sunlight(Lux)',
    'Std_Min_Power(mW)',
    'Std_Max_Temperature(°C)',
    'Std_Max_Humidity(%)',
    'Std_Max_Sunlight(Lux)',
    'Std_Max_Power(mW)'
]

input_features_model_2 = [
    'Avg_Temperature(°C)',
    'Avg_Humidity(%)',
    'Avg_Sunlight(Lux)',
    'Min_Temperature(°C)',
    'Min_Humidity(%)',
    'Min_Sunlight(Lux)',
    'Min_Power(mW)',
    'Max_Temperature(°C)',
    'Max_Humidity(%)',
    'Max_Sunlight(Lux)',
    'Max_Power(mW)'
]

to_predict_features_model_2 = [
    'Avg_Power(mW)',
    ]

In [3]:
def create_dataset(data, LookBackNum):
    X = []
    y = []

    #設定每i-12筆資料(X_train)就對應到第i筆資料(y_train)
    for i in range(LookBackNum,len(data)):
        X.append(data[i-LookBackNum:i, :])
        y.append(data[i, :])

    return np.array(X), np.array(y)
  
#設定LSTM往前看的筆數和預測筆數
n_timesteps = LookBackNum = 6 #LSTM往前看的筆數
ForecastNum = 48 #預測筆數

#迴歸分析 選擇要留下來的資料欄位
#(發電量)
Regression_X_train = SourceData[to_predict_features_model_2].values
Regression_y_train = SourceData[['Avg_Power(mW)']].values


#LSTM 選擇要留下來的資料欄位
AllOutPut = SourceData[to_predict_features_model_1].values
n_features = len(to_predict_features_model_1)


#正規化
LSTM_StandardModel = StandardScaler().fit(AllOutPut)
AllOutPut_Scaled = LSTM_StandardModel.transform(AllOutPut)
X_train, y_train = create_dataset(AllOutPut, LookBackNum=LookBackNum)


# Reshaping
#(samples 是訓練樣本數量,timesteps 是每個樣本的時間步長,features 是每個時間步的特徵數量)
X_train = np.reshape(X_train,(X_train.shape[0], n_timesteps, n_features))

In [4]:
#%%
#============================建置&訓練「LSTM模型」============================
#建置LSTM模型

regressor = Sequential ()

regressor.add(
    Bidirectional(
        LSTM(units=128, 
        return_sequences=True, 
        activation='relu'),
        input_shape=(n_timesteps, n_features)
        )
    )


regressor.add(Dropout(0.2))


regressor.add(LSTM(units=64, activation='relu'))

regressor.add(BatchNormalization())

regressor.add(Dropout(0.2))

regressor.add(Dense(units=n_features)) 

regressor.compile(optimizer = 'adam', loss = 'mse')
regressor.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 6, 256)           143360    
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 6, 256)            0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                82176     
                                                                 
 batch_normalization (BatchN  (None, 64)               256       
 ormalization)                                                   
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 11)                7

In [5]:
#開始訓練
regressor.fit(X_train, y_train, epochs = 300, batch_size = 64)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
 13/148 [=>............................] - ETA: 4s - loss: 492782944.0000

KeyboardInterrupt: 

In [None]:
#保存模型
regressor.save(f'..//Model//WheatherLSTM_{device}.h5')
print('Model Saved')

Model Saved


In [None]:
#載入模型
regressor = load_model(f'..//Model//WheatherLSTM_{device}.h5')
print('Model Loaded Successfully')

Model Loaded Successfully


In [None]:
def SeqNumber2Datetime(seq_number: int) -> datetime:
    seq_number = str(seq_number)
    datetime_str = seq_number[:12]
    return datetime.strptime(datetime_str, "%Y%m%d%H%M") 

def Datetime2SeqNumber(dt: datetime, device_id) -> int:
    base_seq_number = dt.strftime("%Y%m%d%H%M")
    seq_number_with_device = f"{base_seq_number}{device_id:02}"
    return seq_number_with_device

In [None]:
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
target_positions = []
for timestamp, row in SourceData.iterrows():
    if row.isna().any():
        target_position = SourceData.index.get_loc(timestamp)
        if target_position >= LookBackNum :
            target_positions.append(target_position)

In [None]:
for target_position in target_positions:
    X_test = []

    X_test.append(SourceData[to_predict_features_model_1].iloc[target_position-LookBackNum:target_position, :])

    X_test = np.array(X_test)

    X_test = np.reshape(X_test, (X_test.shape[0], n_timesteps, n_features))

    prediction = regressor.predict(X_test)

    prediction = prediction[0]

    for idx, feature in enumerate(to_predict_features_model_1):
        SourceData.at[target_position, feature] = prediction[idx]



In [None]:
SourceData[['DateTime','SeqNumber']+to_predict_features_model_1].to_csv(f'..//Data//PredictedData//Predicted_{device}.csv',index=False)