訓練模型

In [178]:
#%%
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.models import load_model

import joblib
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import os

#載入訓練資料
device = 'L1'
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
SourceData.dropna(inplace=True)
SourceData.reset_index(inplace=True)

In [179]:
input_features_model_1 = to_predict_features_model_1 = input_features_model_2 = [
    'Avg_Temperature(°C)',
    'Avg_Humidity(%)',
    'Avg_Sunlight(Lux)',
    'Avg_Power(mW)',
    'Min_Temperature(°C)',
    'Min_Humidity(%)',
    'Min_Sunlight(Lux)',
    'Min_Power(mW)',
    'Max_Temperature(°C)',
    'Max_Humidity(%)',
    'Max_Sunlight(Lux)',
    'Max_Power(mW)'
]


to_predict_features_model_2 = [
    'Avg_Power(mW)',
    ]

In [180]:
def create_dataset(data, LookBackNum):
    X = []
    y = []

    #設定每i-12筆資料(X_train)就對應到第i筆資料(y_train)
    for i in range(LookBackNum,len(data)):
        X.append(data[i-LookBackNum:i, :])
        y.append(data[i, :])

    return np.array(X), np.array(y)
  
#設定LSTM往前看的筆數和預測筆數
n_timesteps = LookBackNum = 6 #LSTM往前看的筆數
ForecastNum = 48 #預測筆數

#迴歸分析 選擇要留下來的資料欄位
#(發電量)
Regression_X_train = SourceData[to_predict_features_model_2].values
Regression_y_train = SourceData[['Avg_Power(mW)']].values


#LSTM 選擇要留下來的資料欄位
AllOutPut = SourceData[to_predict_features_model_1].values
n_features = len(to_predict_features_model_1)


#正規化
LSTM_StandardModel = StandardScaler().fit(AllOutPut)
AllOutPut_Scaled = LSTM_StandardModel.transform(AllOutPut)
X_train, y_train = create_dataset(AllOutPut_Scaled, LookBackNum=LookBackNum)


# Reshaping
#(samples 是訓練樣本數量,timesteps 是每個樣本的時間步長,features 是每個時間步的特徵數量)
X_train = np.reshape(X_train,(X_train.shape[0], n_timesteps, n_features))

In [193]:
#%%
#============================建置&訓練「LSTM模型」============================
#建置LSTM模型

regressor = Sequential ()

regressor.add(LSTM(
    units = 100, 
    return_sequences = True, 
    input_shape = (n_timesteps, n_features),
    activation='relu'
    ))

regressor.add(LSTM(units = 50, activation='relu'))


# output layer
regressor.add(Dense(units = n_features))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_35 (LSTM)              (None, 6, 100)            45200     
                                                                 
 lstm_36 (LSTM)              (None, 50)                30200     
                                                                 
 dense_14 (Dense)            (None, 12)                612       
                                                                 
Total params: 76,012
Trainable params: 76,012
Non-trainable params: 0
_________________________________________________________________


In [194]:
#開始訓練
regressor.fit(X_train, y_train, epochs = 100, batch_size = 128)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100

In [None]:
#保存模型
regressor.save(f'..//Model//WheatherLSTM_{device}.h5')
print('Model Saved')

Model Saved


In [None]:
#載入模型
regressor = load_model(f'..//Model//WheatherLSTM_{device}.h5')
print('Model Loaded Successfully')

Model Loaded Successfully


In [None]:
def SeqNumber2Datetime(seq_number: int) -> datetime:
    seq_number = str(seq_number)
    datetime_str = seq_number[:12]
    return datetime.strptime(datetime_str, "%Y%m%d%H%M") 

def Datetime2SeqNumber(dt: datetime, device_id) -> int:
    base_seq_number = dt.strftime("%Y%m%d%H%M")
    seq_number_with_device = f"{base_seq_number}{device_id:02}"
    return seq_number_with_device

In [None]:
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
SourceData

Unnamed: 0,DateTime,SeqNumber,Device_ID,Year,Month,Day,Hour,Minute,Avg_WindSpeed(m/s),Avg_Pressure(hpa),...,Min_Temperature(°C),Min_Humidity(%),Min_Sunlight(Lux),Min_Power(mW),Max_WindSpeed(m/s),Max_Pressure(hpa),Max_Temperature(°C),Max_Humidity(%),Max_Sunlight(Lux),Max_Power(mW)
0,2024-01-01 06:30:00,20240101063001,1.0,2024.0,1.0,1.0,6.0,30.0,0.0,1016.54,...,17.5,86.7,25.00,0.00,0.0,1016.59,17.5,86.9,86.67,0.01
1,2024-01-01 06:40:00,20240101064001,1.0,2024.0,1.0,1.0,6.0,40.0,0.0,1016.59,...,17.4,86.7,100.83,0.00,0.0,1016.65,17.5,86.9,235.00,0.03
2,2024-01-01 06:50:00,20240101065001,1.0,2024.0,1.0,1.0,6.0,50.0,0.0,1016.66,...,17.5,86.9,251.67,0.03,0.0,1016.70,17.5,87.1,400.00,0.07
3,2024-01-01 07:00:00,20240101070001,1.0,2024.0,1.0,1.0,7.0,0.0,0.0,1016.73,...,17.5,86.9,415.83,0.07,0.0,1016.76,17.5,87.0,537.50,0.10
4,2024-01-01 07:10:00,20240101071001,1.0,2024.0,1.0,1.0,7.0,10.0,0.0,1016.77,...,17.5,86.5,570.83,0.10,0.0,1016.85,17.6,87.2,784.17,0.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23439,2024-11-30 16:20:00,20241130162001,,,,,,,,,...,,,,,,,,,,
23440,2024-11-30 16:30:00,20241130163001,,,,,,,,,...,,,,,,,,,,
23441,2024-11-30 16:40:00,20241130164001,,,,,,,,,...,,,,,,,,,,
23442,2024-11-30 16:50:00,20241130165001,,,,,,,,,...,,,,,,,,,,


In [None]:
target_positions = []
for timestamp, row in SourceData.iterrows():
    if row.isna().any():
        target_position = SourceData.index.get_loc(timestamp)
        if target_position >= LookBackNum :
            target_positions.append(target_position)

In [None]:
len(target_positions)

14003

In [None]:
target_position = 66

X = SourceData[to_predict_features_model_1].iloc[target_position-LookBackNum:target_position, :]

X = np.array(X)

X = np.reshape(X, (1, n_timesteps, n_features))

prediction = regressor.predict(X)

prediction = prediction[0]
    
for idx, feature in enumerate(to_predict_features_model_1):
    SourceData.at[target_position, feature] = prediction[idx]



array([0.6308283 , 0.90529966, 1.8826288 , 1.2124431 , 0.6248805 ,
       0.92924386, 1.9716998 , 1.2973107 , 0.6292273 , 0.8731189 ,
       1.8731776 , 1.2144318 ], dtype=float32)

In [None]:
SourceData.reindex(SourceData['SeqNumber'])
SourceData[to_predict_features_model_1].to_csv(f'..//Data//PredictedData//Predicted_{device}.csv')

In [None]:
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
PredictedData = pd.read_csv(f'..//Data//PredictedData//Predicted_{device}.csv')

X_train = PredictedData[input_features_model_2].to_numpy()
y_train = SourceData[to_predict_features_model_2].to_numpy()

model_2 = make_pipeline(
    StandardScaler(),
    PCA(n_components=8),
    PolynomialFeatures(degree=4),
    LinearRegression()
)