In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error


In [3]:
df = pd.read_csv('ds_clean/ds_clean_timeset.csv')
dfs = {location: group for location, group in df.groupby('LocationCode')}
for location, group in dfs.items():
    print(f"LocationCode: {location}")
    print(group)
    print()

LocationCode: 1
        LocationCode  WindSpeed(m/s)  Pressure(hpa)  Temperature(°C)  \
587855             1             0.0        1016.50             17.5   
587856             1             0.0        1016.53             17.5   
587857             1             0.0        1016.57             17.5   
587858             1             0.0        1016.58             17.5   
587859             1             0.0        1016.59             17.5   
...              ...             ...            ...              ...   
683763             1             0.0         959.80             25.1   
683764             1             0.0         959.92             25.1   
683765             1             0.0         960.09             25.1   
683766             1             0.0         959.40             25.1   
683767             1             0.0         959.02             25.0   

        Humidity(%)  Sunlight(Lux)  Power(mW)  DayOfYear  \
587855         86.8          25.00        0.0          1   

In [4]:
from datetime import datetime, timedelta
models = {}
for source in range(1,18):
    for destination in range(1,18):
        if source == destination:
            continue
        a = dfs[destination].copy()
        b = dfs[source].copy()
        base_year = 2024
        a['DateTime'] = a.apply(lambda row: datetime(base_year, 1, 1) + timedelta(days=int(row['DayOfYear'])-1, minutes=int(row['MinutesSinceStartOfDay'])), axis=1)
        b['DateTime'] = b.apply(lambda row: datetime(base_year, 1, 1) + timedelta(days=int(row['DayOfYear'])-1, minutes=int(row['MinutesSinceStartOfDay'])), axis=1)
        a['DateTime'] = pd.to_datetime(a['DateTime'])
        b['DateTime'] = pd.to_datetime(b['DateTime'])
        # 他有重複的資料(一分鐘內記了兩次)
        # print(a[a['IsDuplicate']==True])
        a = a.drop_duplicates(subset=['DateTime'])
        b = b.drop_duplicates(subset=['DateTime'])

        # print("Unique DateTime in a:", len(a['DateTime'].unique()))
        # print("Unique DateTime in b:", len(b['DateTime'].unique()))
        common_times = pd.merge(a, b, on='DateTime', how='inner')
        common_set = set(common_times['DateTime'])
        a_filtered = a[a['DateTime'].isin(common_set)]
        b_filtered = b[b['DateTime'].isin(common_set)]
        a_filtered = a_filtered.sort_values(by='DateTime').reset_index(drop=True)
        b_filtered = b_filtered.sort_values(by='DateTime').reset_index(drop=True)
        # print(a_filtered)
        # print(b_filtered)
        # print("Unique DateTime in a_filter:", len(a_filtered['DateTime'].unique()))
        # print("Unique DateTime in b_filter:", len(b_filtered['DateTime'].unique()))
        # print("Common DateTime in a and b:", len(common_times))
        # print("Duplicates in a['DateTime']:", a['DateTime'].duplicated().sum())
        # print("Duplicates in b['DateTime']:", b['DateTime'].duplicated().sum())
        # print("Duplicates in a_filtered['DateTime']:", a_filtered['DateTime'].duplicated().sum())
        # print("Duplicates in b_filtered['DateTime']:", b_filtered['DateTime'].duplicated().sum())
        # print("Duplicates in common_times['DateTime']:", common_times['DateTime'].duplicated().sum())
        y = a_filtered['Power(mW)']
        X = b_filtered.drop(columns=['DateTime'])
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        model = xgb.XGBRegressor()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        cumulative_mae = mae * len(y_test)
        print(f"cumulative_mae: {cumulative_mae}")
        models[(source, destination)] = model





cumulative_mae: 312332.25926169194
cumulative_mae: 245036.9123597714
cumulative_mae: 197795.53180653806
cumulative_mae: 228121.5248613134
cumulative_mae: 233791.49833582967
cumulative_mae: 177603.9400651443
cumulative_mae: 325960.18345845473
cumulative_mae: 181937.27963601513
cumulative_mae: 341487.46627299656
cumulative_mae: 59346.33468462864
cumulative_mae: 232973.2553127778
cumulative_mae: 275900.08319904946
cumulative_mae: 395687.90341517876
cumulative_mae: 560446.9147358203
cumulative_mae: 521794.55145473685
cumulative_mae: 699561.0210365807
cumulative_mae: 277734.01289700274
cumulative_mae: 158057.90849974976
cumulative_mae: 155072.5616315505
cumulative_mae: 194340.15138749348
cumulative_mae: 125756.21792539106
cumulative_mae: 65404.59332107365
cumulative_mae: 257093.36486381534
cumulative_mae: 175928.0385940008
cumulative_mae: 245247.521769728
cumulative_mae: 57186.88012940241
cumulative_mae: 175701.1544527458
cumulative_mae: 262534.4280212948
cumulative_mae: 274786.0921819386
c

In [7]:
day_minute_model = dfs.copy()
time_models = {}
for i in range(1,18):
    day_minute_model[i]  = day_minute_model[i][["MinutesSinceStartOfDay","DayOfYear","Power(mW)"]]
    day_minute_model[i] = day_minute_model[i].drop_duplicates(subset=['MinutesSinceStartOfDay','DayOfYear'])

for i in range(1,18):
    X = day_minute_model[i][["MinutesSinceStartOfDay","DayOfYear"]]
    y = day_minute_model[i][["Power(mW)"]]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model = xgb.XGBRegressor()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    cumulative_mae = mae * len(y_test)
    print(f"cumulative_mae: {cumulative_mae}")
    time_models[i] = model


cumulative_mae: 1517070.3631237214
cumulative_mae: 1260826.996935185
cumulative_mae: 745330.895224825
cumulative_mae: 694145.0578414209
cumulative_mae: 688974.7766626432
cumulative_mae: 678851.9754844315
cumulative_mae: 479467.34663225035
cumulative_mae: 928925.2923597563
cumulative_mae: 495313.0344478755
cumulative_mae: 889853.452668326
cumulative_mae: 151466.93068290822
cumulative_mae: 635411.7458970686
cumulative_mae: 642133.8448881968
cumulative_mae: 823152.6794318502
cumulative_mae: 1032029.9545584616
cumulative_mae: 916024.0261445248
cumulative_mae: 1302142.3623163581


In [44]:
base_year = 2024
second_layer_models = {}
for destination in range(1,18):
    destination_unique_time = dfs[destination].drop_duplicates(subset=['MinutesSinceStartOfDay','DayOfYear']).copy()
    destination_unique_time['DateTime'] = destination_unique_time.apply(lambda row: datetime(base_year, 1, 1) + timedelta(days=int(row['DayOfYear'])-1, minutes=int(row['MinutesSinceStartOfDay'])), axis=1)
    unique_timeset = set(destination_unique_time['DateTime'])
    merged_rest = []
    for source in [k for k in range(1, 18) if k != destination]:
        value = dfs[source].copy()
        value['DateTime'] = value.apply(lambda row: datetime(base_year, 1, 1) + timedelta(days=int(row['DayOfYear'])-1, minutes=int(row['MinutesSinceStartOfDay'])), axis=1)
        value.drop_duplicates(subset=['DateTime'], inplace=True)
        merged_rest.append(value)

    merged_rest_dfs = pd.concat(merged_rest, ignore_index=True)
    merged_rest_dfs['DateTime'] = merged_rest_dfs.apply(lambda row: datetime(base_year, 1, 1) + timedelta(days=int(row['DayOfYear'])-1, minutes=int(row['MinutesSinceStartOfDay'])), axis=1)
    merged_rest_dfs['anticipated_power'] = None

    for source in [k for k in range(1, 18) if k != destination]:
        subset = merged_rest_dfs[merged_rest_dfs['LocationCode'] == source]
        features = subset.drop(columns=['DateTime', 'anticipated_power'])
        predictions = models[(source, destination)].predict(features)
        predictions = predictions.astype(float)
        merged_rest_dfs.loc[merged_rest_dfs['LocationCode'] == source, 'anticipated_power'] = predictions

    merged_rest_dfs['anticipated_power'] = pd.to_numeric(merged_rest_dfs['anticipated_power'], errors='coerce')
    second_train = merged_rest_dfs[['DayOfYear', 'MinutesSinceStartOfDay', 'DateTime', 'LocationCode', 'anticipated_power']]

    # 將資料 pivot，使每個 LocationCode 都會有自己的 anticipated_power 欄位
    second_train_pivot = second_train.pivot(index=['DayOfYear', 'MinutesSinceStartOfDay', 'DateTime'], 
                        columns='LocationCode', values='anticipated_power')

    # 將欄位名稱改成想要的格式，如 LocationXAnticipated
    second_train_pivot.columns = [f"Location{int(col)}Anticipated" for col in second_train_pivot.columns]

    # 將原本的複合索引重設為欄位，這樣 DayOfYear、MinutesSinceStartOfDay、DateTime 都會變成普通的 columns
    second_train_pivot = second_train_pivot.reset_index()
    self_features = second_train_pivot[[ 'MinutesSinceStartOfDay','DayOfYear']]
    self_predictions = time_models[1].predict(self_features)
    second_train_pivot['Location1Anticipated'] = self_predictions
    
    
    second_train_merged = pd.merge(destination_unique_time, second_train_pivot, on=['DateTime'], how='inner')
    second_train_merged.rename(columns={'Power(mW)': 'Location1Actual'}, inplace=True)
    X = second_train_merged.drop(columns=['DateTime', 'Location1Actual'])
    y = second_train_merged['Location1Actual']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model = xgb.XGBRegressor(missing=np.nan, enable_categorical=True)
    print(X_train.dtypes)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    cumulative_mae = mae * len(y_test)
    print(f"cumulative_mae: {cumulative_mae} ")
    print(f"mae: {mae}")
    second_layer_models[destination] = model

# a_locaiton_unique_time = dfs[1].drop_duplicates(subset=['MinutesSinceStartOfDay','DayOfYear']).copy()
# a_locaiton_unique_time['DateTime'] = a_locaiton_unique_time.apply(lambda row: datetime(base_year, 1, 1) + timedelta(days=int(row['DayOfYear'])-1, minutes=int(row['MinutesSinceStartOfDay'])), axis=1)
# unique_timeset = set(a_locaiton_unique_time['DateTime'])
# merged_rest = []
# for key in [k for k in range(2, 18) if k in dfs]:
#     value = dfs[key].copy()
#     value['DateTime'] = value.apply(lambda row: datetime(base_year, 1, 1) + timedelta(days=int(row['DayOfYear'])-1, minutes=int(row['MinutesSinceStartOfDay'])), axis=1)
#     value.drop_duplicates(subset=['DateTime'], inplace=True)
#     merged_rest.append(value)

# merged_rest_dfs = pd.concat(merged_rest, ignore_index=True)
    

# merged_rest_dfs['DateTime'] = merged_rest_dfs.apply(lambda row: datetime(base_year, 1, 1) + timedelta(days=int(row['DayOfYear'])-1, minutes=int(row['MinutesSinceStartOfDay'])), axis=1)
# merged_rest_dfs['anticipated_power'] = None
# merged_rest_dfs

LocationCode                  int64
WindSpeed(m/s)              float64
Pressure(hpa)               float64
Temperature(°C)             float64
Humidity(%)                 float64
Sunlight(Lux)               float64
DayOfYear_x                   int64
MinutesSinceStartOfDay_x      int64
DayOfYear_y                   int64
MinutesSinceStartOfDay_y      int64
Location2Anticipated        float64
Location3Anticipated        float64
Location4Anticipated        float64
Location5Anticipated        float64
Location6Anticipated        float64
Location7Anticipated        float64
Location8Anticipated        float64
Location9Anticipated        float64
Location10Anticipated       float64
Location11Anticipated       float64
Location12Anticipated       float64
Location13Anticipated       float64
Location14Anticipated       float64
Location15Anticipated       float64
Location16Anticipated       float64
Location17Anticipated       float64
Location1Anticipated        float32
dtype: object
cumulative_mae

In [None]:
# for i in range(2, 18):
#     subset = merged_rest_dfs[merged_rest_dfs['LocationCode'] == i]
#     features = subset.drop(columns=['DateTime', 'anticipated_power'])
#     predictions = models[(i, 1)].predict(features)
#     predictions = predictions.astype(float)
#     merged_rest_dfs.loc[merged_rest_dfs['LocationCode'] == i, 'anticipated_power'] = predictions

# merged_rest_dfs['anticipated_power'] = pd.to_numeric(merged_rest_dfs['anticipated_power'], errors='coerce')

# merged_rest_dfs


Unnamed: 0,LocationCode,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),DayOfYear,MinutesSinceStartOfDay,DateTime,anticipated_power
0,2,0.00,1013.49,26.46,42.99,18575.83,61.29,17,928,2024-01-17 15:28:00,67.165520
1,2,0.52,1013.47,26.33,43.78,18635.00,62.20,17,929,2024-01-17 15:29:00,66.400391
2,2,0.35,1013.49,26.26,43.84,15628.33,42.12,17,936,2024-01-17 15:36:00,45.420399
3,2,0.35,1013.48,26.12,46.14,14980.83,39.39,17,939,2024-01-17 15:39:00,45.420399
4,2,1.48,1013.47,26.01,47.97,15128.33,40.54,17,940,2024-01-17 15:40:00,45.504864
...,...,...,...,...,...,...,...,...,...,...,...
1097463,17,0.00,1004.05,36.50,65.40,12315.00,35.91,194,926,2024-07-12 15:26:00,283.349518
1097464,17,0.00,1004.08,36.50,64.70,12692.50,38.23,194,927,2024-07-12 15:27:00,283.349518
1097465,17,0.00,1004.10,36.50,64.50,12960.00,39.95,194,928,2024-07-12 15:28:00,240.087509
1097466,17,0.00,1004.11,36.40,64.90,12751.67,38.51,194,929,2024-07-12 15:29:00,240.087509


In [35]:
# second_train = merged_rest_dfs[['DayOfYear', 'MinutesSinceStartOfDay', 'DateTime', 'LocationCode', 'anticipated_power']]

# # 將資料 pivot，使每個 LocationCode 都會有自己的 anticipated_power 欄位
# second_train_pivot = second_train.pivot(index=['DayOfYear', 'MinutesSinceStartOfDay', 'DateTime'], 
#                     columns='LocationCode', values='anticipated_power')

# # 將欄位名稱改成想要的格式，如 LocationXAnticipated
# second_train_pivot.columns = [f"Location{int(col)}Anticipated" for col in second_train_pivot.columns]

# # 將原本的複合索引重設為欄位，這樣 DayOfYear、MinutesSinceStartOfDay、DateTime 都會變成普通的 columns
# second_train_pivot = second_train_pivot.reset_index()

# # 現在 df_pivot 裡面您就有：
# # DayOfYear, MinutesSinceStartOfDay, DateTime (作為特徵欄位)
# # 以及 Location1Anticipated, Location2Anticipated, ... 等欄位

# # second_train_pivot.drop(columns=['DateTime'], inplace=True)
# second_train_pivot


Unnamed: 0,DayOfYear,MinutesSinceStartOfDay,DateTime,Location2Anticipated,Location3Anticipated,Location4Anticipated,Location5Anticipated,Location6Anticipated,Location7Anticipated,Location8Anticipated,Location9Anticipated,Location10Anticipated,Location11Anticipated,Location12Anticipated,Location13Anticipated,Location14Anticipated,Location15Anticipated,Location16Anticipated,Location17Anticipated
0,1,389,2024-01-01 06:29:00,,,,,,,,,,,,,,,,8.299028
1,1,390,2024-01-01 06:30:00,,,,,,,,,,,,,,,,8.299028
2,1,391,2024-01-01 06:31:00,,,,,,,,,,,,,,,,8.299028
3,1,392,2024-01-01 06:32:00,,,,,,,,,,,,,,,,8.299028
4,1,393,2024-01-01 06:33:00,,,,,,,,,,,,,,,,8.299028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167883,244,907,2024-08-31 15:07:00,,,,,,,204.294037,,,,,,,,,
167884,244,908,2024-08-31 15:08:00,,,,,,,205.317581,,,,,,,,,
167885,244,909,2024-08-31 15:09:00,,,,,,,154.533005,,,,,,,,,
167886,244,910,2024-08-31 15:10:00,,,,,,,169.485428,,,,,,,,,


In [None]:
# self_features = second_train_pivot[[ 'MinutesSinceStartOfDay','DayOfYear']]
# self_predictions = time_models[1].predict(self_features)
# second_train_pivot['Location1Anticipated'] = self_predictions
# second_train_pivot

Unnamed: 0,DayOfYear,MinutesSinceStartOfDay,DateTime,Location2Anticipated,Location3Anticipated,Location4Anticipated,Location5Anticipated,Location6Anticipated,Location7Anticipated,Location8Anticipated,Location9Anticipated,Location10Anticipated,Location11Anticipated,Location12Anticipated,Location13Anticipated,Location14Anticipated,Location15Anticipated,Location16Anticipated,Location17Anticipated,Location1Anticipated
0,1,389,2024-01-01 06:29:00,,,,,,,,,,,,,,,,8.299028,0.503504
1,1,390,2024-01-01 06:30:00,,,,,,,,,,,,,,,,8.299028,0.503504
2,1,391,2024-01-01 06:31:00,,,,,,,,,,,,,,,,8.299028,2.417047
3,1,392,2024-01-01 06:32:00,,,,,,,,,,,,,,,,8.299028,2.417047
4,1,393,2024-01-01 06:33:00,,,,,,,,,,,,,,,,8.299028,2.417047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167883,244,907,2024-08-31 15:07:00,,,,,,,204.294037,,,,,,,,,,2.372569
167884,244,908,2024-08-31 15:08:00,,,,,,,205.317581,,,,,,,,,,2.372569
167885,244,909,2024-08-31 15:09:00,,,,,,,154.533005,,,,,,,,,,2.372569
167886,244,910,2024-08-31 15:10:00,,,,,,,169.485428,,,,,,,,,,2.372569


In [37]:
# second_train_pivot.dtypes

DayOfYear                          int64
MinutesSinceStartOfDay             int64
DateTime                  datetime64[ns]
Location2Anticipated             float64
Location3Anticipated             float64
Location4Anticipated             float64
Location5Anticipated             float64
Location6Anticipated             float64
Location7Anticipated             float64
Location8Anticipated             float64
Location9Anticipated             float64
Location10Anticipated            float64
Location11Anticipated            float64
Location12Anticipated            float64
Location13Anticipated            float64
Location14Anticipated            float64
Location15Anticipated            float64
Location16Anticipated            float64
Location17Anticipated            float64
Location1Anticipated             float32
dtype: object

In [None]:
# second_train_merged = pd.merge(a_locaiton_unique_time, second_train_pivot, on=['DateTime'], how='inner')
# second_train_merged.rename(columns={'Power(mW)': 'Location1Actual'}, inplace=True)
# X = second_train_merged.drop(columns=['DateTime', 'Location1Actual'])
# y = second_train_merged['Location1Actual']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# model = xgb.XGBRegressor(missing=np.nan, enable_categorical=True)
# print(X_train.dtypes)
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# mae = mean_absolute_error(y_test, y_pred)
# cumulative_mae = mae * len(y_test)
# print(f"mae: {mae}")


LocationCode                  int64
WindSpeed(m/s)              float64
Pressure(hpa)               float64
Temperature(°C)             float64
Humidity(%)                 float64
Sunlight(Lux)               float64
DayOfYear_x                   int64
MinutesSinceStartOfDay_x      int64
DayOfYear_y                   int64
MinutesSinceStartOfDay_y      int64
Location2Anticipated        float64
Location3Anticipated        float64
Location4Anticipated        float64
Location5Anticipated        float64
Location6Anticipated        float64
Location7Anticipated        float64
Location8Anticipated        float64
Location9Anticipated        float64
Location10Anticipated       float64
Location11Anticipated       float64
Location12Anticipated       float64
Location13Anticipated       float64
Location14Anticipated       float64
Location15Anticipated       float64
Location16Anticipated       float64
Location17Anticipated       float64
Location1Anticipated        float32
dtype: object
mae: 10.548059

## 驗證

In [45]:
question = pd.read_csv('ds_clean/upload.csv')
# 将 '序號' 列转换为字符串
question['序號'] = question['序號'].astype(str)

# 提取 DayOfYear, MinutesSinceStartOfDay 和 LocationCode
question['DayOfYear'] = pd.to_datetime(question['序號'].str[:8], format='%Y%m%d').dt.dayofyear
question['MinutesSinceStartOfDay'] = question['序號'].str[8:12].apply(lambda x: int(x[:2]) * 60 + int(x[2:4]))
question['LocationCode'] = question['序號'].str[12:14].astype(int)

# 删除原来的 '序號' 列
question = question.drop(columns=['序號'])
question 

Unnamed: 0,答案,DayOfYear,MinutesSinceStartOfDay,LocationCode
0,,1,540,1
1,,1,550,1
2,,1,560,1
3,,1,570,1
4,,1,580,1
...,...,...,...,...
9595,,193,970,17
9596,,193,980,17
9597,,193,990,17
9598,,193,1000,17


In [79]:
for i in range(1, 18):
    question[f"Location{i}Anticipated"] = None

question
for i in range(1,18):
    self_predictions_subset = question[question['LocationCode'] == i][[ 'MinutesSinceStartOfDay','DayOfYear']]
    self_predictions = time_models[i].predict(self_predictions_subset)
    question.loc[question['LocationCode'] == i, f'Location{i}Anticipated'] = self_predictions

merged = question.merge(df, on=['DayOfYear', 'MinutesSinceStartOfDay'], how='left')


# 處理 _x 和 _y 的情況
if 'LocationCode_x' in merged.columns and 'LocationCode_y' in merged.columns:
    merged['LocationCode'] = merged['LocationCode_x']  # 保留一個欄位
    merged.drop(columns=['LocationCode_x', 'LocationCode_y'], inplace=True)

expected_columns = [
    'LocationCode',
    'WindSpeed(m/s)',
    'Pressure(hpa)',
    'Temperature(°C)',
    'Humidity(%)',
    'Sunlight(Lux)',
    'Power(mW)',
    'DayOfYear',
    'MinutesSinceStartOfDay'
]

# 先一次性處理所有特徵，使整個 DataFrame 型別正確、移除不必要欄位
merged_numeric = merged.drop(columns=['DateTime', 'anticipated_power', "答案"], errors='ignore')
merged_numeric = merged_numeric.apply(pd.to_numeric, errors='coerce')
merged_numeric = merged_numeric.drop(
    columns=[col for col in merged_numeric.columns if col.startswith('Location') and col.endswith('Anticipated')],
    errors='ignore'
)

# 確保特徵欄位存在於 merged_numeric 中
for col in expected_columns:
    if col not in merged_numeric.columns:
        raise ValueError(f"缺少必要的特徵欄位: {col}")

# 我們要為每個 i (1~17) 建立預測欄位

for i in range(1, 18):
    merged[f'Location{i}Anticipated'] = None  # 先初始化為 None
print(merged.columns)
print(merged.shape)
# 接下來我們用整批處理的方式而非逐行 apply
# 對於每一個 LocationCode i (可能是目標地點代碼)，
# 並針對 subset[LocationCode == i] 用對應模型進行預測，將結果寫入 merged[f'Location{i}Anticipated']。

# 首先，依據 LocationCode 群組處理
grouped = merged_numeric.groupby('LocationCode')
for code_in_subset, grp in grouped:
    # grp 是同一個 LocationCode 的所有資料列
    # 根據題意，我們要對每個 (i, code_in_subset) 執行預測，i != code_in_subset
    # 所以 i 要從 1 跑到 17，略過 i == code_in_subset
    for i in range(1, 18):
        if i == code_in_subset:
            continue
        # 選擇該分組的資料，取得特徵
        feats = grp[expected_columns]
        # 使用對應的模型 (i, code_in_subset) 進行預測
        # 假設 models 是一個 dict，其中 key 為 (i, code_in_subset)
        # 並且已經建立對應的模型
        model_key = (i, code_in_subset)
        if model_key not in models:
            # 若沒有對應的模型，可以選擇跳過或報錯
            continue
        predictions = models[model_key].predict(feats)

        # 將預測結果回填到 merged 中對應的 rows
        merged.loc[grp.index, f'Location{i}Anticipated'] = predictions.astype(float)

# 最後將結果寫回 question (只跑一次整批)
for i in range(1, 18):
    question[f'Location{i}Anticipated'] = merged[f'Location{i}Anticipated']

merged




Index(['答案', 'DayOfYear', 'MinutesSinceStartOfDay', 'Location1Anticipated',
       'Location2Anticipated', 'Location3Anticipated', 'Location4Anticipated',
       'Location5Anticipated', 'Location6Anticipated', 'Location7Anticipated',
       'Location8Anticipated', 'Location9Anticipated', 'Location10Anticipated',
       'Location11Anticipated', 'Location12Anticipated',
       'Location13Anticipated', 'Location14Anticipated',
       'Location15Anticipated', 'Location16Anticipated',
       'Location17Anticipated', 'WindSpeed(m/s)', 'Pressure(hpa)',
       'Temperature(°C)', 'Humidity(%)', 'Sunlight(Lux)', 'Power(mW)',
       'LocationCode'],
      dtype='object')
(70007, 27)


Unnamed: 0,答案,DayOfYear,MinutesSinceStartOfDay,Location1Anticipated,Location2Anticipated,Location3Anticipated,Location4Anticipated,Location5Anticipated,Location6Anticipated,Location7Anticipated,...,Location15Anticipated,Location16Anticipated,Location17Anticipated,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW),LocationCode
0,,1,540,,-13.56834,39.548126,14.759489,-20.418625,29.959755,-87.673378,...,827.397461,34.178524,68.446228,0.00,1024.43,19.00,65.30,5677.50,7.08,1
1,,1,550,,-15.820904,37.904057,13.425326,-20.418625,29.959755,-87.673378,...,844.112488,34.178524,-5.334711,0.00,1024.43,19.00,65.30,5152.50,5.97,1
2,,1,560,,-10.620915,44.731522,17.094324,-14.616568,32.710274,-103.730293,...,842.528198,118.676834,152.268265,0.00,1024.47,19.10,64.90,7272.50,11.50,1
3,,1,570,,-5.308188,48.661247,34.095325,-18.301979,39.746971,-76.00882,...,753.319275,124.089844,490.567383,0.00,1024.68,19.20,64.90,8965.00,16.63,1
4,,1,580,,24.769348,48.24855,36.757183,-12.259192,41.580494,-73.703255,...,763.43219,128.649719,649.495667,0.00,1024.59,19.30,64.90,9902.50,20.72,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70002,,193,1010,2.531712,23.618324,71.445457,65.391815,26.232336,21.590908,-1.563293,...,165.736313,-31.128788,,0.00,1000.02,36.19,52.49,7743.33,18.11,17
70003,,193,1010,-31.788208,167.139557,18.012224,56.156219,1.142862,52.134338,3.79005,...,346.859375,131.804688,,0.61,999.80,37.03,57.64,11121.67,24.85,17
70004,,193,1010,29.39588,36.092323,25.802542,59.406269,1.479751,31.174267,3.986486,...,227.004074,25.050169,,0.00,999.32,38.18,47.47,9539.17,25.34,17
70005,,193,1010,0.502084,3.488981,-11.386699,43.404099,4.088176,4.285177,-4.958761,...,5.534564,-192.240555,,0.00,1000.20,34.53,75.54,4860.83,5.48,17
