In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler 
from scipy import stats
from scipy.stats import norm,skew
import warnings
from sklearn.preprocessing import RobustScaler
warnings.filterwarnings("ignore")
%matplotlib inline
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential  
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import r2_score

# 讀取數據
data_path = r'C:\Users\rex\Desktop\水位預測\3\1510H057.csv'   # 檔案位置(可更改位置)
data = pd.read_csv(data_path)
# 定義 時間索引 欄位
def create_timestamp(data):
    # 將"Date"和"Time"合併成時間戳記並設置為索引
    data['Timestamp'] = pd.to_datetime(data['Date'] + ' ' + data['Time'])
    data = data.set_index('Timestamp')
    return data

# 原始資料加入時間索引
data = create_timestamp(data)
print (data)
data.head(5)



                           Date   Time  1510H057 Water Level  19335 Rainfall
Timestamp                                                                   
2020-01-01 00:00:00    2020/1/1  00:00                 88.47        0.000000
2020-01-01 01:00:00    2020/1/1  01:00                 88.79        0.000000
2020-01-01 02:00:00    2020/1/1  02:00                 88.78        0.000000
2020-01-01 03:00:00    2020/1/1  03:00                 88.84        0.000000
2020-01-01 04:00:00    2020/1/1  04:00                 88.81        1.428012
...                         ...    ...                   ...             ...
2022-12-31 19:00:00  2022/12/31  19:00                 88.83        0.000000
2022-12-31 20:00:00  2022/12/31  20:00                 88.82        0.000000
2022-12-31 21:00:00  2022/12/31  21:00                 88.80        0.055771
2022-12-31 22:00:00  2022/12/31  22:00                 88.81        0.000000
2022-12-31 23:00:00  2022/12/31  23:00                 88.81        0.014507

Unnamed: 0_level_0,Date,Time,1510H057 Water Level,19335 Rainfall
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01 00:00:00,2020/1/1,00:00,88.47,0.0
2020-01-01 01:00:00,2020/1/1,01:00,88.79,0.0
2020-01-01 02:00:00,2020/1/1,02:00,88.78,0.0
2020-01-01 03:00:00,2020/1/1,03:00,88.84,0.0
2020-01-01 04:00:00,2020/1/1,04:00,88.81,1.428012


In [2]:

# 水位四捨五入小數第一位
data['1510H057 Water Level'] = data['1510H057 Water Level'].round(1)

# 列出所有要增加特徵的欄位名稱
columns_to_expand = [
    # "1510H084 Water Level",
    # "1510H076 Water Level",
    # "1510H071 Water Level",
    "1510H057 Water Level",
    # "19774 Rainfall"
    # "20016 Rainfall"
    # "19606 Rainfall"
    "19335 Rainfall"
]

# 1.產出_T-1到_T-48的特徵
for col in columns_to_expand:
    for t in range(1, 73):
        new_col_name = f"{col}_T-{t}"
        data[new_col_name] = data[col].shift(t)
# --------------------------------------------------------------------------------------------------------        
# 2.產出過去某段時間(48hr) 的平均水位和標準差
# 建立滑動窗口的平均水位和標準差
window_size = 48 # 48小時

# for station in ["1510H084", "1510H076", "1510H071", "1510H057"]:
for station in ["1510H057"]:    
    col = f"{station} Water Level"
    data[f"{col}_rolling_mean"] = data[col].rolling(window_size).mean()     #過去48小時平均水位
    data[f"{col}_rolling_std"] = data[col].rolling(window_size).std()       #過去48小時水位標準差

# -------------------------------------------------------------------------------------------------------------
# data.to_excel(r'C:\Users\rex\Desktop\水位預測\2\總資料新增特徵.xlsx', index=False)

# 對每個站點的水位差分
# for station in ["1510H084", "1510H076", "1510H071", "1510H057"]:

# for station in ["1510H057"]:    
#     col = f"{station} Water Level"
#     data[f"{col}_diff"] = data[col].diff()                                  #前一小時水位差

# 3. 產出每個月的平均水位
monthly_mean = data.groupby(data.index.month)[f"1510H057 Water Level"].mean()

#  創建一個新列，其中包含每個時間戳記與其對應月份的平均水位
data[f"1510H057 Water Level_monthly_mean"] = data.index.month.map(monthly_mean)
# -------------------------------------------------------------------------------------------------------------------
# 4. 計算水位距平值：當前水位與月平均水位的差值
data[f"1510H057 Water Level_distance"] = data[f"1510H057 Water Level"] - data[f"1510H057 Water Level_monthly_mean"]
# -------------------------------------------------------------------------------------------------------------------
# 5.計算3、6、9、12、24、48、72小時的累積雨量
time_intervals = [3, 6, 9, 12, 24, 48, 72]
# -------------------------------------------------------------------------------------------------------------------
for interval in time_intervals:
    col_name = f"19335 Rainfall_{interval}h_cumsum"
    data[col_name] = data["19335 Rainfall"].rolling(window=interval).sum()

print(data)
# -------------------------------------------------------------------------------------------------------------------
# 6.增加T+1到T+288的雨量
for station in [
    # "19774 Rainfall"
    # "20016 Rainfall"
    "19335 Rainfall"
    # "19606 Rainfall"
    ]:
    col = f"{station}"
    for t in range(1, 289):
        future_col_name = f"{col}_T+{t}"
        data[future_col_name] = data[col].shift(-t)

# 刪除包含NaN的行
data = data.dropna()
# 刪除不必要的列
data = data.drop(['Date', 'Time'], axis=1)  

data

                           Date   Time  1510H057 Water Level  19335 Rainfall  \
Timestamp                                                                      
2020-01-01 00:00:00    2020/1/1  00:00                  88.5        0.000000   
2020-01-01 01:00:00    2020/1/1  01:00                  88.8        0.000000   
2020-01-01 02:00:00    2020/1/1  02:00                  88.8        0.000000   
2020-01-01 03:00:00    2020/1/1  03:00                  88.8        0.000000   
2020-01-01 04:00:00    2020/1/1  04:00                  88.8        1.428012   
...                         ...    ...                   ...             ...   
2022-12-31 19:00:00  2022/12/31  19:00                  88.8        0.000000   
2022-12-31 20:00:00  2022/12/31  20:00                  88.8        0.000000   
2022-12-31 21:00:00  2022/12/31  21:00                  88.8        0.055771   
2022-12-31 22:00:00  2022/12/31  22:00                  88.8        0.000000   
2022-12-31 23:00:00  2022/12/31  23:00  

Unnamed: 0_level_0,1510H057 Water Level,19335 Rainfall,1510H057 Water Level_T-1,1510H057 Water Level_T-2,1510H057 Water Level_T-3,1510H057 Water Level_T-4,1510H057 Water Level_T-5,1510H057 Water Level_T-6,1510H057 Water Level_T-7,1510H057 Water Level_T-8,...,19335 Rainfall_T+279,19335 Rainfall_T+280,19335 Rainfall_T+281,19335 Rainfall_T+282,19335 Rainfall_T+283,19335 Rainfall_T+284,19335 Rainfall_T+285,19335 Rainfall_T+286,19335 Rainfall_T+287,19335 Rainfall_T+288
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-04 00:00:00,88.1,0.0,88.2,88.2,88.2,88.2,88.2,88.2,88.2,88.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
2020-01-04 01:00:00,88.2,0.0,88.1,88.2,88.2,88.2,88.2,88.2,88.2,88.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
2020-01-04 02:00:00,88.2,0.0,88.2,88.1,88.2,88.2,88.2,88.2,88.2,88.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
2020-01-04 03:00:00,88.2,0.0,88.2,88.2,88.1,88.2,88.2,88.2,88.2,88.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
2020-01-04 04:00:00,88.2,0.0,88.2,88.2,88.2,88.1,88.2,88.2,88.2,88.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-19 19:00:00,88.9,0.0,88.9,88.9,88.9,88.9,88.9,88.9,88.9,88.9,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
2022-12-19 20:00:00,88.9,0.0,88.9,88.9,88.9,88.9,88.9,88.9,88.9,88.9,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
2022-12-19 21:00:00,88.9,0.0,88.9,88.9,88.9,88.9,88.9,88.9,88.9,88.9,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.055771
2022-12-19 22:00:00,88.9,0.0,88.9,88.9,88.9,88.9,88.9,88.9,88.9,88.9,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.055771,0.000000


In [None]:
# 特徵輸出

df = pd.DataFrame(data)

df.to_csv('資料.csv', index=True)  # 如果不想保留索引，可以將 index 參數設為 False

In [3]:
# 列出目標水位列
target_columns = [
    "1510H057 Water Level"
    # "1510H071 Water Level"
    # "1510H076 Water Level"
    # "1510H084 Water Level"
]
# 1. 增加目標列：T+1到T+288的目標水位
for target_col in target_columns:
    for t in range(1, 289):
        target = f'{target_col}_target_{t}'
        data[target] = data[target_col].shift(-t)
        
# 移除含有 NaN 的行
data = data.dropna()


In [None]:
data

In [4]:
# 輸出特徵名稱
import pandas as pd

# 建立特征名稱與描述之間的對應
feature_description = {}

# 滾動平均和標準差的描述
for station in ["1510H071"]:    
    col = f"{station} Water Level"
    feature_description[f"{col}_rolling_mean"] = f"{col} 過去48小時的滾動平均"
    feature_description[f"{col}_rolling_std"] = f"{col} 過去48小時的滾動標準差"

# 水位差分描述
for station in ["1510H071"]:    
    col = f"{station} Water Level"
    feature_description[f"{col}_diff"] = f"{col} 與前一小時的水位差分"

# 增加T+1到T+72的雨量描述
for station in ["19606 Rainfall"]:
    col = f"{station}"
    for t in range(1, 289):
        future_col_name = f"{col}_T+{t}"
        feature_description[future_col_name] = f"{col} 在時間點 T+{t} 的預測雨量"

# 目標列描述
for target_col in target_columns:
    for t in range(1, 289):
        target = f'{target_col}_target_{t}'
        feature_description[target] = f"{target_col} 在時間點 T+{t} 的目標水位"

# 轉換成DataFrame並保存為Excel
df_descriptions = pd.DataFrame(list(feature_description.items()), columns=['Feature Name', 'Description'])
df_descriptions.to_excel(r'C:\Users\rex\Desktop\git\data-processing\water_model4\1510H071feature_descriptions.xlsx', index=False)


In [5]:
# 使用 RandomizedSearchCV 選取區間最佳化參數
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from scipy.stats import uniform, randint
import lightgbm as lgb

# 切分數據為訓練和測試集
X = data.drop([f'1510H084 Water Level_target_{i}' for i in range(1, 289)], axis=1)  # 特徵
y = data[f'1510H084 Water Level_target_1']  # 目標變數

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 切分訓練和測試集

# 定義參數的範圍
param_dist = {
    'objective': ['regression'],  # 回歸問題
    'num_leaves': randint(24, 35),  # 樹的最大葉子數
    'learning_rate': uniform(0.01, 0.03),  # 學習率
    'n_estimators': randint(650, 760),  # 樹的數量
    'max_bin': randint(30, 50),  # 最大bin數量
    'subsample': uniform(0.7, 0.85),  # 子樣本比例
    'subsample_freq': randint(2, 5),  # 子樣本選擇頻率
    'colsample_bytree': uniform(0.7, 0.85),  # 每棵樹使用特徵的比例
    'min_child_samples': randint(4, 7),  # 每個葉子節點上的最小樣本數
    'min_child_weight': randint(10, 13),  # 最小子節點權重和
    'device': ['gpu'],  # 使用GPU進行計算
}

lgb_estimator = lgb.LGBMRegressor(silent=True, verbose=0)  # 定義LightGBM模型

# 隨機搜索參數
rsearch = RandomizedSearchCV(estimator=lgb_estimator,
                             param_distributions=param_dist,
                             n_iter=100,  # 嘗試的參數組合次數
                             cv=5,  # 交叉驗證的折數
                             verbose=1,
                             n_jobs=5,  # 使用的CPU核心數
                             random_state=42)

rsearch.fit(X_train, y_train)  # 在訓練數據上進行隨機搜索

# 輸出最佳參數和分數
print('Best parameters found by random search are', rsearch.best_params_)
print('Best score found by random search is', rsearch.best_score_)


KeyError: "['1510H084 Water Level_target_1', '1510H084 Water Level_target_2', '1510H084 Water Level_target_3', '1510H084 Water Level_target_4', '1510H084 Water Level_target_5', '1510H084 Water Level_target_6', '1510H084 Water Level_target_7', '1510H084 Water Level_target_8', '1510H084 Water Level_target_9', '1510H084 Water Level_target_10', '1510H084 Water Level_target_11', '1510H084 Water Level_target_12', '1510H084 Water Level_target_13', '1510H084 Water Level_target_14', '1510H084 Water Level_target_15', '1510H084 Water Level_target_16', '1510H084 Water Level_target_17', '1510H084 Water Level_target_18', '1510H084 Water Level_target_19', '1510H084 Water Level_target_20', '1510H084 Water Level_target_21', '1510H084 Water Level_target_22', '1510H084 Water Level_target_23', '1510H084 Water Level_target_24', '1510H084 Water Level_target_25', '1510H084 Water Level_target_26', '1510H084 Water Level_target_27', '1510H084 Water Level_target_28', '1510H084 Water Level_target_29', '1510H084 Water Level_target_30', '1510H084 Water Level_target_31', '1510H084 Water Level_target_32', '1510H084 Water Level_target_33', '1510H084 Water Level_target_34', '1510H084 Water Level_target_35', '1510H084 Water Level_target_36', '1510H084 Water Level_target_37', '1510H084 Water Level_target_38', '1510H084 Water Level_target_39', '1510H084 Water Level_target_40', '1510H084 Water Level_target_41', '1510H084 Water Level_target_42', '1510H084 Water Level_target_43', '1510H084 Water Level_target_44', '1510H084 Water Level_target_45', '1510H084 Water Level_target_46', '1510H084 Water Level_target_47', '1510H084 Water Level_target_48', '1510H084 Water Level_target_49', '1510H084 Water Level_target_50', '1510H084 Water Level_target_51', '1510H084 Water Level_target_52', '1510H084 Water Level_target_53', '1510H084 Water Level_target_54', '1510H084 Water Level_target_55', '1510H084 Water Level_target_56', '1510H084 Water Level_target_57', '1510H084 Water Level_target_58', '1510H084 Water Level_target_59', '1510H084 Water Level_target_60', '1510H084 Water Level_target_61', '1510H084 Water Level_target_62', '1510H084 Water Level_target_63', '1510H084 Water Level_target_64', '1510H084 Water Level_target_65', '1510H084 Water Level_target_66', '1510H084 Water Level_target_67', '1510H084 Water Level_target_68', '1510H084 Water Level_target_69', '1510H084 Water Level_target_70', '1510H084 Water Level_target_71', '1510H084 Water Level_target_72', '1510H084 Water Level_target_73', '1510H084 Water Level_target_74', '1510H084 Water Level_target_75', '1510H084 Water Level_target_76', '1510H084 Water Level_target_77', '1510H084 Water Level_target_78', '1510H084 Water Level_target_79', '1510H084 Water Level_target_80', '1510H084 Water Level_target_81', '1510H084 Water Level_target_82', '1510H084 Water Level_target_83', '1510H084 Water Level_target_84', '1510H084 Water Level_target_85', '1510H084 Water Level_target_86', '1510H084 Water Level_target_87', '1510H084 Water Level_target_88', '1510H084 Water Level_target_89', '1510H084 Water Level_target_90', '1510H084 Water Level_target_91', '1510H084 Water Level_target_92', '1510H084 Water Level_target_93', '1510H084 Water Level_target_94', '1510H084 Water Level_target_95', '1510H084 Water Level_target_96', '1510H084 Water Level_target_97', '1510H084 Water Level_target_98', '1510H084 Water Level_target_99', '1510H084 Water Level_target_100', '1510H084 Water Level_target_101', '1510H084 Water Level_target_102', '1510H084 Water Level_target_103', '1510H084 Water Level_target_104', '1510H084 Water Level_target_105', '1510H084 Water Level_target_106', '1510H084 Water Level_target_107', '1510H084 Water Level_target_108', '1510H084 Water Level_target_109', '1510H084 Water Level_target_110', '1510H084 Water Level_target_111', '1510H084 Water Level_target_112', '1510H084 Water Level_target_113', '1510H084 Water Level_target_114', '1510H084 Water Level_target_115', '1510H084 Water Level_target_116', '1510H084 Water Level_target_117', '1510H084 Water Level_target_118', '1510H084 Water Level_target_119', '1510H084 Water Level_target_120', '1510H084 Water Level_target_121', '1510H084 Water Level_target_122', '1510H084 Water Level_target_123', '1510H084 Water Level_target_124', '1510H084 Water Level_target_125', '1510H084 Water Level_target_126', '1510H084 Water Level_target_127', '1510H084 Water Level_target_128', '1510H084 Water Level_target_129', '1510H084 Water Level_target_130', '1510H084 Water Level_target_131', '1510H084 Water Level_target_132', '1510H084 Water Level_target_133', '1510H084 Water Level_target_134', '1510H084 Water Level_target_135', '1510H084 Water Level_target_136', '1510H084 Water Level_target_137', '1510H084 Water Level_target_138', '1510H084 Water Level_target_139', '1510H084 Water Level_target_140', '1510H084 Water Level_target_141', '1510H084 Water Level_target_142', '1510H084 Water Level_target_143', '1510H084 Water Level_target_144', '1510H084 Water Level_target_145', '1510H084 Water Level_target_146', '1510H084 Water Level_target_147', '1510H084 Water Level_target_148', '1510H084 Water Level_target_149', '1510H084 Water Level_target_150', '1510H084 Water Level_target_151', '1510H084 Water Level_target_152', '1510H084 Water Level_target_153', '1510H084 Water Level_target_154', '1510H084 Water Level_target_155', '1510H084 Water Level_target_156', '1510H084 Water Level_target_157', '1510H084 Water Level_target_158', '1510H084 Water Level_target_159', '1510H084 Water Level_target_160', '1510H084 Water Level_target_161', '1510H084 Water Level_target_162', '1510H084 Water Level_target_163', '1510H084 Water Level_target_164', '1510H084 Water Level_target_165', '1510H084 Water Level_target_166', '1510H084 Water Level_target_167', '1510H084 Water Level_target_168', '1510H084 Water Level_target_169', '1510H084 Water Level_target_170', '1510H084 Water Level_target_171', '1510H084 Water Level_target_172', '1510H084 Water Level_target_173', '1510H084 Water Level_target_174', '1510H084 Water Level_target_175', '1510H084 Water Level_target_176', '1510H084 Water Level_target_177', '1510H084 Water Level_target_178', '1510H084 Water Level_target_179', '1510H084 Water Level_target_180', '1510H084 Water Level_target_181', '1510H084 Water Level_target_182', '1510H084 Water Level_target_183', '1510H084 Water Level_target_184', '1510H084 Water Level_target_185', '1510H084 Water Level_target_186', '1510H084 Water Level_target_187', '1510H084 Water Level_target_188', '1510H084 Water Level_target_189', '1510H084 Water Level_target_190', '1510H084 Water Level_target_191', '1510H084 Water Level_target_192', '1510H084 Water Level_target_193', '1510H084 Water Level_target_194', '1510H084 Water Level_target_195', '1510H084 Water Level_target_196', '1510H084 Water Level_target_197', '1510H084 Water Level_target_198', '1510H084 Water Level_target_199', '1510H084 Water Level_target_200', '1510H084 Water Level_target_201', '1510H084 Water Level_target_202', '1510H084 Water Level_target_203', '1510H084 Water Level_target_204', '1510H084 Water Level_target_205', '1510H084 Water Level_target_206', '1510H084 Water Level_target_207', '1510H084 Water Level_target_208', '1510H084 Water Level_target_209', '1510H084 Water Level_target_210', '1510H084 Water Level_target_211', '1510H084 Water Level_target_212', '1510H084 Water Level_target_213', '1510H084 Water Level_target_214', '1510H084 Water Level_target_215', '1510H084 Water Level_target_216', '1510H084 Water Level_target_217', '1510H084 Water Level_target_218', '1510H084 Water Level_target_219', '1510H084 Water Level_target_220', '1510H084 Water Level_target_221', '1510H084 Water Level_target_222', '1510H084 Water Level_target_223', '1510H084 Water Level_target_224', '1510H084 Water Level_target_225', '1510H084 Water Level_target_226', '1510H084 Water Level_target_227', '1510H084 Water Level_target_228', '1510H084 Water Level_target_229', '1510H084 Water Level_target_230', '1510H084 Water Level_target_231', '1510H084 Water Level_target_232', '1510H084 Water Level_target_233', '1510H084 Water Level_target_234', '1510H084 Water Level_target_235', '1510H084 Water Level_target_236', '1510H084 Water Level_target_237', '1510H084 Water Level_target_238', '1510H084 Water Level_target_239', '1510H084 Water Level_target_240', '1510H084 Water Level_target_241', '1510H084 Water Level_target_242', '1510H084 Water Level_target_243', '1510H084 Water Level_target_244', '1510H084 Water Level_target_245', '1510H084 Water Level_target_246', '1510H084 Water Level_target_247', '1510H084 Water Level_target_248', '1510H084 Water Level_target_249', '1510H084 Water Level_target_250', '1510H084 Water Level_target_251', '1510H084 Water Level_target_252', '1510H084 Water Level_target_253', '1510H084 Water Level_target_254', '1510H084 Water Level_target_255', '1510H084 Water Level_target_256', '1510H084 Water Level_target_257', '1510H084 Water Level_target_258', '1510H084 Water Level_target_259', '1510H084 Water Level_target_260', '1510H084 Water Level_target_261', '1510H084 Water Level_target_262', '1510H084 Water Level_target_263', '1510H084 Water Level_target_264', '1510H084 Water Level_target_265', '1510H084 Water Level_target_266', '1510H084 Water Level_target_267', '1510H084 Water Level_target_268', '1510H084 Water Level_target_269', '1510H084 Water Level_target_270', '1510H084 Water Level_target_271', '1510H084 Water Level_target_272', '1510H084 Water Level_target_273', '1510H084 Water Level_target_274', '1510H084 Water Level_target_275', '1510H084 Water Level_target_276', '1510H084 Water Level_target_277', '1510H084 Water Level_target_278', '1510H084 Water Level_target_279', '1510H084 Water Level_target_280', '1510H084 Water Level_target_281', '1510H084 Water Level_target_282', '1510H084 Water Level_target_283', '1510H084 Water Level_target_284', '1510H084 Water Level_target_285', '1510H084 Water Level_target_286', '1510H084 Water Level_target_287', '1510H084 Water Level_target_288'] not found in axis"

In [None]:
# 歸一化數據 X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) 最小最大歸一化
scalers = {}
for i in data.columns:
    scaler = MinMaxScaler(feature_range=(0, 1))
    s_s = scaler.fit_transform(data[i].values.reshape(-1,1))
    s_s = np.reshape(s_s, len(s_s))
    scalers['scaler_'+ i] = scaler
    data[i] = s_s
    
# 使用 

In [None]:
#導入
from sklearn.linear_model import ElasticNet, Lasso, BayesianRidge, LassoLarsIC, Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold,cross_val_score,train_test_split,cross_val_predict
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from math import sqrt
from tensorflow.keras.layers import TimeDistributed, Flatten
from lightgbm import early_stopping


In [None]:
#定義滑動窗口，窗口由 time_steps 參數控制
def create_dataset(X, y, time_steps=1):         #   X 和 y，分別代表特徵和目標變量。time_steps 參數則表示滑動窗口的大小。
    Xs, ys = [], []                             #   創建兩個空的列表 Xs 和 ys
    for i in range(len(X) - time_steps):        #從第一個時間點開始，一直到可以形成最後一個完整的滑動窗口的位置。
        v = X.iloc[i:(i + time_steps)].values   #選取一個時間窗口內的數據。第 i 時間點開始，包含了接下來的 time_steps 個時間點的數據。
        Xs.append(v)                            #將這個時間窗口的數據添加到 Xs 列表中。
        ys.append(y.iloc[i + time_steps])       #將目標變量在該窗口之後的第一個值添加到 ys 列表中。這個值是與前面時間窗口的 X 對應的 y。
    return np.array(Xs), np.array(ys)           #將 Xs 和 ys 轉換為 numpy 數組並返回。

In [None]:
# MAPE計算函數
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
import random
from joblib import dump
#隨機種子
seed_value = 42             # 種子值
random.seed(seed_value)     # random 模組設定隨機數生成器的種子
np.random.seed(seed_value)  # NumPy 模組設定隨機數生成器的種子

# 切分數據集，前80%的數據作為訓練集，剩下的20%作為測試集。
train_size = int((len(data) *0.8))
train_data = data[:train_size]
test_data = data[train_size:]

#然後將訓練集中最後的10%作為驗證集。
val_size = int(len(train_data) *0.1)
train_data, val_data = train_data[:-val_size], train_data[-val_size:]

#定義時間步長
time_steps = 48

#副本的創建
train_temp = train_data.copy()
val_temp = val_data.copy()
test_temp = test_data.copy()


In [None]:
import datetime
import numpy as np
from joblib import load
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


specified_timestamp = "2022-08-18 23:00:00"
specified_timestamp_dt = datetime.datetime.strptime(specified_timestamp, '%Y-%m-%d %H:%M:%S')

data_temp = data.loc[specified_timestamp_dt - datetime.timedelta(hours=24): 
                     specified_timestamp_dt + datetime.timedelta(hours=47)]

global_ymin = data[target_columns].min().min()
global_ymax = data[target_columns].max().max()


for target_col in target_columns:
    data_temp_loop = data_temp.copy()  # 這裡創建了data_temp的一個副本，使其在每次迴圈中保持原始狀態

    print(f"Size of data_temp_loop: {len(data_temp_loop)}")

    # 在進行預測之前，將這些數據值轉換回原始尺度
    global_ymin_rescaled = scalers["scaler_" + target_col].inverse_transform([[global_ymin]])[0][0]
    global_ymax_rescaled = scalers["scaler_" + target_col].inverse_transform([[global_ymax]])[0][0]
    
    true_values = []
    predictions = []    

    for t in range(1, 289):

        # 載入模型
        model = load(f"water_model4/model_{target_col}_t{t}.joblib")

        # 如果t不等於1，更新test_temp中的特定列值為上一個時刻的預測值
        if t > 1 and predictions:
            # 將 T+1 的預測值設置為 T+2 的輸入
            data_temp_loop.loc[specified_timestamp_dt, target_col] = predictions[-1]
        
        target = f"{target_col}_target_{t}"

        # 切分測試集
        X_test, y_test = create_dataset(data_temp_loop.drop(columns=[f'{target_col}_target_{i}' for i in range(1, 289)]), data_temp_loop[target], time_steps)
        if len(X_test) == 0:
            print(f"Timestamp: {specified_timestamp}, Target: {target}, X_test is empty!")
            continue  # 如果X_test為空，則跳過後續的代碼並進行下一次迴圈        
        # 重塑 X_test
        X_test = X_test.reshape((X_test.shape[0], -1))
        
        # 進行預測
        y_pred = model.predict(X_test)
        y_pred_rescaled = scalers["scaler_" + target_col].inverse_transform(y_pred.reshape(-1, 1)).flatten()
        y_test_rescaled = scalers["scaler_" + target_col].inverse_transform(y_test.reshape(-1, 1)).flatten()
        
        true_values.append(y_test_rescaled[-1])  # 取最後一個真實值
        predictions.append(y_pred_rescaled[-1])  # 取最後一個預測值
        
        r2 = r2_score(y_test_rescaled, y_pred_rescaled)
        mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
        mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
        rmse = np.sqrt(mse)
        mape = mean_absolute_percentage_error(y_test_rescaled, y_pred_rescaled)
        
        # timestamp 為測試數據集中的預測開始的時間點
        print(f"Timestamp: {specified_timestamp}, Target: {target}, r2: {r2}, mse: {mse}, mae: {mae}, rmse: {rmse}, mape: {mape}")
        
        df = pd.DataFrame({
            "True Values": true_values,
            "Predictions": predictions
        })
        
        output_filename = f"{target_col}_predictions.xlsx"
        with pd.ExcelWriter(output_filename, engine="openpyxl") as writer:
            df.to_excel(writer, sheet_name=target_col, index=False)
        print(f"{target_col} predictions saved to {output_filename}")
        
    # 繪製真實值與預測值
    plt.figure(figsize=(20, 6))
    plt.plot(true_values, label="True Values", color="blue")
    plt.plot(predictions, label="Predictions", color="red", linestyle="--")
    
    # 使用轉換回原始尺度的最大和最小值設置y軸尺度
    plt.ylim(global_ymin_rescaled, global_ymax_rescaled)
    
    plt.title(f"Predictions vs True Values for {target_col}")
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
# T+1 模型評分
import numpy as np
from joblib import load
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 定義計算平均絕對百分比誤差 (MAPE) 的函數
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# 函數，用於評估模型在不同數據集上的表現
def evaluate_model(data_set, model, scalers, target_col, time_steps, dataset_name):
    # 創建數據集
    X, y = create_dataset(data_set.drop(columns=[f'{target_col}_target_{i}' for i in range(1, 289)]),
                          data_set[f"{target_col}_target_1"], time_steps)
    
    # 檢查數據集是否為空
    if len(X) == 0:
        print(f"{dataset_name} is empty for {target_col}")
        return
    
    # 數據整形
    X = X.reshape((X.shape[0], -1))
    
    # 使用模型進行預測
    y_pred = model.predict(X)
    
    # 將預測值和真實值恢復到原始尺度
    y_pred_rescaled = scalers["scaler_" + target_col].inverse_transform(y_pred.reshape(-1, 1)).flatten()
    y_rescaled = scalers["scaler_" + target_col].inverse_transform(y.reshape(-1, 1)).flatten()

    # 計算性能指標
    r2 = r2_score(y_rescaled, y_pred_rescaled)
    mse = mean_squared_error(y_rescaled, y_pred_rescaled)
    mae = mean_absolute_error(y_rescaled, y_pred_rescaled)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_rescaled, y_pred_rescaled)
    
    print(f"{dataset_name} 為 {target_col} - r2: {r2:.4f}, mse: {mse:.4f}, mae: {mae:.4f}, rmse: {rmse:.4f}, mape: {mape:.4f}%")

# 評估模型
for target_col in target_columns:
    # 加載模型
    model = load(f"water_model4/model_{target_col}_t1.joblib")
    
    # 為訓練、驗證和測試數據集評估模型
    evaluate_model(train_data, model, scalers, target_col, time_steps, "Training Set")
    evaluate_model(val_data, model, scalers, target_col, time_steps, "Validation Set")
    evaluate_model(test_data, model, scalers, target_col, time_steps, "Test Set")


In [None]:
import numpy as np
from joblib import load
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def evaluate_model(data_set, model, scalers, target_col, time_steps, dataset_name):
        # 創建數據集
    X, y = create_dataset(data_set.drop(columns=[f'{target_col}_target_{i}' for i in range(1, 289)]),
                          data_set[f"{target_col}_target_1"], time_steps)
        # 檢查數據集是否為空
    if len(X) == 0:
        print(f"{dataset_name} is empty for {target_col}")
        return
    
    # 數據整形
    X = X.reshape((X.shape[0], -1))
    
    # 使用模型進行預測
    y_pred = model.predict(X)
    
    # 將預測值和真實值恢復到原始尺度
    y_pred_rescaled = scalers["scaler_" + target_col].inverse_transform(y_pred.reshape(-1, 1)).flatten()
    y_rescaled = scalers["scaler_" + target_col].inverse_transform(y.reshape(-1, 1)).flatten()

    # 計算性能指標
    r2 = r2_score(y_rescaled, y_pred_rescaled)
    mse = mean_squared_error(y_rescaled, y_pred_rescaled)
    mae = mean_absolute_error(y_rescaled, y_pred_rescaled)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_rescaled, y_pred_rescaled)
    
    print(f"{dataset_name} 為 {target_col} - r2: {r2:.4f}, mse: {mse:.4f}, mae: {mae:.4f}, rmse: {rmse:.4f}, mape: {mape:.4f}%")
    return {"r2": r2, "mse": mse, "mae": mae, "rmse": rmse, "mape": mape}

results = {}  # 儲存結果的字典

for t in range(1, 289):  # 從T1到T288
    for target_col in target_columns:
        # 根據T值加載模型
        model = load(f"water_model4/model_{target_col}_t{t}.joblib")
        
        # 評估模型
        results[f"{target_col}_T{t}"] = {
            "Training": evaluate_model(train_data, model, scalers, target_col, time_steps, "Training Set"),
            "Validation": evaluate_model(val_data, model, scalers, target_col, time_steps, "Validation Set"),
            "Test": evaluate_model(test_data, model, scalers, target_col, time_steps, "Test Set")
        }

# 繪圖組合r2和mape
plt.figure(figsize=(15, 8))

# 繪製r2 (左邊y軸)
ax1 = plt.gca()
for key, value in results.items():
    ax1.plot(key, value["Training"]["r2"], 'o-', label=f"Training r2 {key}")
    ax1.plot(key, value["Validation"]["r2"], 's-', label=f"Validation r2 {key}")
    ax1.plot(key, value["Test"]["r2"], 'd-', label=f"Test r2 {key}")

ax1.set_ylabel('r2')
ax1.set_title('r2 and MAPE over different models')
ax1.grid(True)

# 繪製mape (右邊y軸)
ax2 = ax1.twinx()
for key, value in results.items():
    ax2.plot(key, value["Training"]["mape"], 'o--', label=f"Training MAPE {key}")
    ax2.plot(key, value["Validation"]["mape"], 's--', label=f"Validation MAPE {key}")
    ax2.plot(key, value["Test"]["mape"], 'd--', label=f"Test MAPE {key}")

ax2.set_ylabel('MAPE (%)')

# 設定和顯示
ax1.legend(loc='upper left')
ax2.legend(loc='upper right')
ax1.set_xticks(list(results.keys()))
ax1.set_xticklabels(list(results.keys()), rotation=45)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from joblib import load
from sklearn.metrics import r2_score, mean_absolute_error

# 定義計算平均絕對百分比誤差 (MAPE) 的函數
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def evaluate_model(data_set, model, scalers, target_col, time_steps, dataset_name):
    # 創建數據集
    X, y = create_dataset(data_set.drop(columns=[f'{target_col}_target_{i}' for i in range(1, 289)]),
                          data_set[f"{target_col}_target_1"], time_steps)
    
    # 檢查數據集是否為空
    if len(X) == 0:
        print(f"{dataset_name} is empty for {target_col}")
        return -1, -1  # 返回兩個無效的指標值
    
    # 數據整形
    X = X.reshape((X.shape[0], -1))
    
    # 使用模型進行預測
    y_pred = model.predict(X)
    
    # 將預測值和真實值恢復到原始尺度
    y_pred_rescaled = scalers["scaler_" + target_col].inverse_transform(y_pred.reshape(-1, 1)).flatten()
    y_rescaled = scalers["scaler_" + target_col].inverse_transform(y.reshape(-1, 1)).flatten()

    # 計算性能指標
    r2 = r2_score(y_rescaled, y_pred_rescaled)
    mape = mean_absolute_percentage_error(y_rescaled, y_pred_rescaled)
    
    return r2, mape

# 評估模型
r2_results = {
    "Training Set": [],
    "Validation Set": [],
    "Test Set": []
}
mape_results = {
    "Training Set": [],
    "Validation Set": [],
    "Test Set": []
}

datasets = {
    "Training Set": train_data,
    "Validation Set": val_data,
    "Test Set": test_data
}

for t in range(1, 289):  # 從t1到t288
    for dataset_name, dataset in datasets.items():
        current_r2 = []
        current_mape = []
        
        for target_col in target_columns:
            # 加載模型
            model = load(f"water_model4/model_{target_col}_t{t}.joblib")

            # 評估模型
            r2, mape = evaluate_model(dataset, model, scalers, target_col, time_steps, dataset_name)
            current_r2.append(r2)
            current_mape.append(mape)
        
        r2_results[dataset_name].append(np.mean(current_r2))
        mape_results[dataset_name].append(np.mean(current_mape))

# 繪製R^2和MAPE的趨勢圖
for dataset_name in datasets.keys():
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(range(1, 289), r2_results[dataset_name], '-o')
    plt.title(f"R^2 趨勢圖 ({dataset_name})")
    plt.xlabel("時間步")
    plt.ylabel("R^2")

    plt.subplot(1, 2, 2)
    plt.plot(range(1, 289), mape_results[dataset_name], '-o', color='red')
    plt.title(f"MAPE 趨勢圖 ({dataset_name})")
    plt.xlabel("時間步")
    plt.ylabel("MAPE (%)")
    
    plt.tight_layout()
    plt.show()


In [None]:
import matplotlib.pyplot as plt

# Plotting the R^2 and MAPE trends
for dataset_name in datasets.keys():
    fig, ax1 = plt.subplots(figsize=(12, 5))
    
    # Plotting R^2 on the left y-axis
    ax1.set_xlabel('Time Steps')
    ax1.set_ylabel('R^2', color='blue')
    ax1.plot(range(1, 289), r2_results[dataset_name], '-o', color='blue', label="R^2")
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.set_title(f"R^2 & MAPE Trend for {dataset_name}")
    ax1.set_ylim(-1, 1)  # Set R^2 scale to [-1, 1]

    # Drawing a line at R^2=0.8
    ax1.axhline(y=0.8, color='green', linestyle='--')
    
    # Finding the time step where R^2 goes below 0.8 for the first time
    try:
        x_pos = next(i for i, r2 in enumerate(r2_results[dataset_name]) if r2 < 0.8) + 1
        # Drawing a vertical line at the position where R^2 first goes below 0.8
        ax1.axvline(x=x_pos, color='green', linestyle='')

        # Offset for adjusting text position
        x_offset = 5  # This can be adjusted as needed
        y_offset = 0.01  # This can be adjusted as needed

        ax1.text(x_pos + x_offset, 0.8 - y_offset, str(x_pos), color='green', fontsize=30)
    except StopIteration:
        pass
    
    # Creating a second y-axis that shares the same x-axis with ax1
    ax2 = ax1.twinx()
    ax2.set_ylabel('MAPE', color='red')  # Removed the (%) for clarity, given new scale
    ax2.plot(range(1, 289), mape_results[dataset_name], '-o', color='red', label="MAPE")
    ax2.tick_params(axis='y', labelcolor='red')
    ax2.set_ylim(-1, 1)  # Set MAPE scale to [0, 1]

    # Display the legend
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc=0)
    
    fig.tight_layout()  # Ensure the plots don't overlap
    plt.show()


In [None]:
# 取出個別模型查看評估指標
from joblib import load
# 函數，用於評估模型在不同數據集上的表現
def evaluate_model(data_set, model, scalers, target_col, time_steps, dataset_name):
    # 創建數據集
    X, y = create_dataset(data_set.drop(columns=[f'{target_col}_target_{i}' for i in range(1, 289)]),
                          data_set[f"{target_col}_target_144"], time_steps)  
    
    # 檢查數據集是否為空
    if len(X) == 0:
        print(f"{dataset_name} is empty for {target_col}")
        return
    
    # 數據整形
    X = X.reshape((X.shape[0], -1))
    
    # 使用模型進行預測
    y_pred = model.predict(X)
    
    # 將預測值逆轉換回原始尺度
    y_pred_rescaled = scalers["scaler_" + target_col].inverse_transform(y_pred.reshape(-1, 1)).flatten()
    y_rescaled = scalers["scaler_" + target_col].inverse_transform(y.reshape(-1, 1)).flatten()
    
    # 計算和打印評估指標
    r2 = r2_score(y_rescaled, y_pred_rescaled)
    mse = mean_squared_error(y_rescaled, y_pred_rescaled)
    mae = mean_absolute_error(y_rescaled, y_pred_rescaled)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_rescaled, y_pred_rescaled)
    
    print(f"{dataset_name} for {target_col} - r2: {r2:.4f}, mse: {mse:.4f}, mae: {mae:.4f}, rmse: {rmse:.4f}, mape: {mape:.2f}%")
    
# 使用函數評估模型
model = load(f"water_model4/model_1510H057 Water Level_t144.joblib")
evaluate_model(train_data, model, scalers, "1510H057 Water Level", time_steps, "Training Set")
evaluate_model(val_data, model, scalers, "1510H057 Water Level", time_steps, "Validation Set")
evaluate_model(test_data, model, scalers, "1510H057 Water Level", time_steps, "Test Set")


In [None]:
# 繪製前20特徵重要性與儲存至Excel&圖片
import os
import joblib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

folder_path = r'C:\Users\rex\Desktop\git\data-processing\water_model4'

# 創建 'picture' 資料夾
picture_folder_path = os.path.join(folder_path, 'picture1')
if not os.path.exists(picture_folder_path):
    os.makedirs(picture_folder_path)

all_files = os.listdir(folder_path)

# 將模型文件名根據前綴進行分組
model_groups = {}
for file in all_files:
    if file.endswith('.joblib'):
        # 取得模型名稱前綴，例如：model_1510H057 Water Level
        prefix = "_".join(file.split("_")[:-1])
        if prefix not in model_groups:
            model_groups[prefix] = []
        model_groups[prefix].append(file)

for prefix, files in model_groups.items():
    cumulative_feature_importances = None
    features = None

    for file in files:
        model_path = os.path.join(folder_path, file)
        model = joblib.load(model_path)

        # 從模型中提取特徵名稱
        if features is None:
            features = model.feature_name_
        
        # 獲取特徵的重要性
        feature_importances = model.feature_importances_

        # 如果累積特徵重要性還沒有初始化，則使用第一個模型的特徵重要性初始化它
        if cumulative_feature_importances is None:
            cumulative_feature_importances = np.zeros_like(feature_importances)
        
        # 累積特徵重要性
        cumulative_feature_importances += feature_importances

    # 將累積特徵重要性排序
    sorted_idx = np.argsort(cumulative_feature_importances)[::-1]
    top_features = np.array(features)[sorted_idx]
    top_importances = cumulative_feature_importances[sorted_idx]
    
    # 儲存特徵重要性到Excel
    df = pd.DataFrame({
        'Feature': top_features,
        'Importance': top_importances
    })
    excel_name = f"{prefix}_Cumulative_Feature_Importance.xlsx"
    excel_path = os.path.join(folder_path, excel_name)
    df.to_excel(excel_path, index=False, engine='openpyxl')
    
    # 繪製累積特徵重要性
    plt.figure(figsize=(20, 12))
    plt.title(f"Cumulative Feature Importances for {prefix}")
    plt.bar(top_features[:20], top_importances[:20])
    plt.xticks(rotation=45)

    # 儲存圖片到 'picture' 資料夾中
    picture_name = f"{prefix}_Cumulative_Feature_Importance.png"
    picture_path = os.path.join(picture_folder_path, picture_name)
    plt.savefig(picture_path)

    # 關閉當前圖片
    plt.close()


In [None]:
import matplotlib.pyplot as plt

def plot_rainfall_histogram(data, start_timestamp):
    # 獲取指定時間之後的288小時的雨量數據
    start_time = pd.to_datetime(start_timestamp)
    end_time = start_time + pd.Timedelta(hours=288)
    subset_data = data.loc[start_time:end_time, '19335 Rainfall']
    
    # 使用 matplotlib 繪製直方圖
    fig, ax = plt.subplots(figsize=(12, 8)) # 可根據需求調整圖片尺寸
    ax.bar(subset_data.index, subset_data.values)
    ax.invert_yaxis()  # 倒置 y 軸 (時間軸)
    ax.xaxis.tick_top()  # x 軸移到上面
    plt.title(f"Rainfall Histogram from {start_timestamp} to {end_time}")
    plt.tight_layout()
    plt.show()
    
# 使用函數畫出指定時間的雨量直方圖
timestamp_to_plot = '2020-05-29 23:00:00' # 您可以修改此行以選擇不同的時間戳
plot_rainfall_histogram(data, timestamp_to_plot)


In [None]:
import pandas as pd

def export_real_rainfall_to_excel(data, start_timestamp, scalers, column_name='19335 Rainfall', filename="real_rainfall_data.xlsx"):
    # 獲取指定時間雨量
    start_time = pd.to_datetime(start_timestamp)
    end_time = start_time + pd.Timedelta(hours=360) # 時間(hr)
    subset_data = data.loc[start_time:end_time, column_name]

    # 逆轉換
    scaler = scalers['scaler_' + column_name]
    real_values = scaler.inverse_transform(subset_data.values.reshape(-1, 1))
    
    # 真實值放入Dataframe
    df_real = pd.DataFrame(real_values, index=subset_data.index, columns=[column_name])
    df_real.to_excel(filename)

timestamp_to_export = '2020-05-26 23:00:00' # 修改時間
export_real_rainfall_to_excel(data, timestamp_to_export, scalers)
