訓練模型

In [112]:
#%%
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.models import load_model

import joblib
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import os

#載入訓練資料
device = 'L1'
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
SourceData.dropna(inplace=True)
SourceData.reset_index(inplace=True)

In [113]:
input_features_model_1 = to_predict_features_model_1 = input_features_model_2 = [
    'Avg_Temperature(°C)',
    'Avg_Humidity(%)',
    'Avg_Sunlight(Lux)',

    # 'Min_Temperature(°C)',
    # 'Min_Humidity(%)',
    # 'Min_Sunlight(Lux)',

    # 'Max_Temperature(°C)',
    # 'Max_Humidity(%)',
    # 'Max_Sunlight(Lux)',
]


to_predict_features_model_2 = [
    'Avg_Power(mW)',
    ]

In [130]:
def create_dataset(data, LookBackNum):
    X = []
    y = []

    #設定每i-12筆資料(X_train)就對應到第i筆資料(y_train)
    for i in range(LookBackNum,len(data)):
        X.append(data[i-LookBackNum:i, :])
        y.append(data[i, :])

    return np.array(X), np.array(y)
  
#設定LSTM往前看的筆數和預測筆數
n_timesteps = LookBackNum = 6 #LSTM往前看的筆數
ForecastNum = 48 #預測筆數

#迴歸分析 選擇要留下來的資料欄位
#(發電量)
Regression_X_train = SourceData[to_predict_features_model_2].values
Regression_y_train = SourceData[['Avg_Power(mW)']].values


#LSTM 選擇要留下來的資料欄位
AllOutPut = SourceData[to_predict_features_model_1].values
n_features = len(to_predict_features_model_1)


#正規化
LSTM_StandardModel = StandardScaler().fit(AllOutPut)
AllOutPut_Scaled = LSTM_StandardModel.transform(AllOutPut)
X_train, y_train = create_dataset(AllOutPut_Scaled, LookBackNum=LookBackNum)


# Reshaping
#(samples 是訓練樣本數量,timesteps 是每個樣本的時間步長,features 是每個時間步的特徵數量)
X_train = np.reshape(X_train,(X_train.shape[0], n_timesteps, n_features))

In [None]:
#%%
#============================建置&訓練「LSTM模型」============================
#建置LSTM模型

regressor = Sequential ()

regressor.add(LSTM(
    units = 200, 
    return_sequences = True, 
    input_shape = (n_timesteps, n_features),
    activation='relu'
    ))

regressor.add(LSTM(
    units = 128, 
    activation='relu',
    return_sequences = False, 
    ))


# output layer
regressor.add(Dense(units = n_features))

regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.summary()

Model: "sequential_26"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_56 (LSTM)              (None, 6, 200)            163200    
                                                                 
 lstm_57 (LSTM)              (None, 128)               168448    
                                                                 
 dense_31 (Dense)            (None, 3)                 387       
                                                                 
Total params: 332,035
Trainable params: 332,035
Non-trainable params: 0
_________________________________________________________________


In [None]:
#開始訓練
regressor.fit(X_train, y_train, epochs = 100, batch_size = 128)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100

In [None]:
#保存模型
regressor.save(f'..//Model//WheatherLSTM_{device}.h5')
print('Model Saved')

Model Saved


In [None]:
#載入模型
regressor = load_model(f'..//Model//WheatherLSTM_{device}.h5')
print('Model Loaded Successfully')

Model Loaded Successfully


In [None]:
def SeqNumber2Datetime(seq_number: int) -> datetime:
    seq_number = str(seq_number)
    datetime_str = seq_number[:12]
    return datetime.strptime(datetime_str, "%Y%m%d%H%M") 

def Datetime2SeqNumber(dt: datetime, device_id) -> int:
    base_seq_number = dt.strftime("%Y%m%d%H%M")
    seq_number_with_device = f"{base_seq_number}{device_id:02}"
    return seq_number_with_device

In [None]:
SourceData = pd.read_csv(f"..//Data//MergedSorted//{device}_Merged_Sorted.csv")
SourceData

Unnamed: 0,DateTime,SeqNumber,Device_ID,Year,Month,Day,Hour,Minute,Avg_WindSpeed(m/s),Avg_Pressure(hpa),...,Std_Min_Temperature(°C),Std_Min_Humidity(%),Std_Min_Sunlight(Lux),Std_Min_Power(mW),Std_Max_WindSpeed(m/s),Std_Max_Pressure(hpa),Std_Max_Temperature(°C),Std_Max_Humidity(%),Std_Max_Sunlight(Lux),Std_Max_Power(mW)
0,2024-01-01 06:30:00,20240101063001,1.0,2024.0,1.0,1.0,6.0,30.0,0.0,1016.54,...,-1.277347,0.546684,-0.844341,-0.526245,-0.499413,0.022279,-1.308157,0.498119,-0.895384,-0.632017
1,2024-01-01 06:40:00,20240101064001,1.0,2024.0,1.0,1.0,6.0,40.0,0.0,1016.59,...,-1.287759,0.546684,-0.841386,-0.526245,-0.499413,0.022629,-1.308157,0.498119,-0.890886,-0.631981
2,2024-01-01 06:50:00,20240101065001,1.0,2024.0,1.0,1.0,6.0,50.0,0.0,1016.66,...,-1.277347,0.554346,-0.835507,-0.526170,-0.499413,0.022920,-1.308157,0.505943,-0.885882,-0.631909
3,2024-01-01 07:00:00,20240101070001,1.0,2024.0,1.0,1.0,7.0,0.0,0.0,1016.73,...,-1.277347,0.554346,-0.829109,-0.526071,-0.499413,0.023270,-1.308157,0.502031,-0.881711,-0.631855
4,2024-01-01 07:10:00,20240101071001,1.0,2024.0,1.0,1.0,7.0,10.0,0.0,1016.77,...,-1.277347,0.539022,-0.823068,-0.525997,-0.499413,0.023794,-1.298099,0.509854,-0.874230,-0.631693
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23439,2024-11-30 16:20:00,20241130162001,,,,,,,,,...,,,,,,,,,,
23440,2024-11-30 16:30:00,20241130163001,,,,,,,,,...,,,,,,,,,,
23441,2024-11-30 16:40:00,20241130164001,,,,,,,,,...,,,,,,,,,,
23442,2024-11-30 16:50:00,20241130165001,,,,,,,,,...,,,,,,,,,,


In [None]:
target_positions = []
for timestamp, row in SourceData.iterrows():
    if row.isna().any():
        target_position = SourceData.index.get_loc(timestamp)
        if target_position >= LookBackNum :
            target_positions.append(target_position)

In [None]:
len(target_positions)

14003

In [None]:
PredictedData = SourceData.copy()
TestData = pd.read_csv('..//Data/TestData//upload(no answer).csv')
TestData = TestData[TestData['序號'] % 10 == 1]
to_predict_sequmber = TestData['序號'].to_list()
indices = PredictedData[PredictedData['SeqNumber'].isin(to_predict_sequmber)][to_predict_features_model_1].index.to_list()

In [None]:

for index in indices:
    X = PredictedData.loc[index-LookBackNum:index-1][to_predict_features_model_1]