In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

# 1. 데이터 로드
data = pd.read_csv("./merged_result.csv")

# 2. 특징 전처리
data['풍향'] = np.sin(np.deg2rad(data['풍향']))
time_ohe = pd.get_dummies(data['time'], prefix='time')
data_ohe = pd.concat([data, time_ohe], axis=1)

features_to_scale = ['강수확률', '일최저기온', '1시간기온', '하늘상태', 
                  '일최고기온', '습도', '풍향', '1시간강수량', '풍속']
scaler = MinMaxScaler()
data_ohe[features_to_scale] = scaler.fit_transform(data_ohe[features_to_scale])

# 3. 데이터 분할
input_features = features_to_scale + [f'time_{i}' for i in range(24)]
target_variables = ['수평면', '외기온도', '경사면', '모듈온도']
X = data_ohe[input_features]
y = data_ohe[target_variables]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. 모델 학습 및 하이퍼파라미터 튜닝
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 0.9, 1],
    'max_features': ['sqrt', 'log2', None]
}
grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid, cv=5, n_jobs=-1, verbose=1)

# 5. 테스트셋에서의 예측
predictions = {}
for target in target_variables:
    grid_search.fit(X_train, y_train[target])
    predictions[target] = grid_search.predict(X_test)

# 예측 결과와 실제 값 비교
comparison_dfs = {}
for target in target_variables:
    comparison_dfs[target] = pd.DataFrame({
        f"Actual {target}": y_test[target],
        f"Predicted {target}": predictions[target]
    })

Fitting 5 folds for each of 243 candidates, totalling 1215 fits
Fitting 5 folds for each of 243 candidates, totalling 1215 fits
Fitting 5 folds for each of 243 candidates, totalling 1215 fits
Fitting 5 folds for each of 243 candidates, totalling 1215 fits


In [3]:

# 예: '외기온도'에 대한 비교 결과 출력
print(comparison_dfs['외기온도'].head())
print(comparison_dfs['수평면'].head())
print(comparison_dfs['모듈온도'].head())
print(comparison_dfs['경사면'].head())

      Actual 외기온도  Predicted 외기온도
1318         24.2       25.298652
899          43.0       40.873729
439          24.5       26.447077
560          23.9       29.841131
433          24.6       24.823679
      Actual 수평면  Predicted 수평면
1318         0.0       9.241344
899        626.0     638.249490
439         42.0      76.366948
560         33.0     159.936186
433          0.0      23.313487
      Actual 모듈온도  Predicted 모듈온도
1318         22.8       24.015430
899          52.2       46.581605
439          22.8       24.427405
560          23.0       27.500278
433          23.0       23.926920
      Actual 경사면  Predicted 경사면
1318         0.0       3.855825
899        674.0     669.280840
439         27.0      39.186481
560         32.0     131.703560
433          0.0      27.554915
