In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

# 1. 데이터 로드
data = pd.read_csv("./merged_result.csv")

# 2. 특징 전처리
data['풍향'] = np.sin(np.deg2rad(data['풍향']))

features_to_scale = ['강수확률', '일최저기온', '하늘상태', '일최고기온', '습도', '풍향', '풍속']
scaler = MinMaxScaler()
data[features_to_scale] = scaler.fit_transform(data[features_to_scale])

# 중요한 피처로 간주하여 별도로 스케일링
important_features = ['time']
scaler_important = MinMaxScaler(feature_range=(0.5, 1))
data[important_features] = scaler_important.fit_transform(data[important_features])

# 3. 데이터 분할
input_features = features_to_scale + important_features
target_variables = ['수평면', '외기온도', '경사면', '모듈온도']
X = data[input_features]
y = data[target_variables]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. 모델 학습 및 하이퍼파라미터 튜닝
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 0.9, 1],
    'max_features': ['sqrt', 'log2', None]
}
grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid, cv=5, n_jobs=-1, verbose=1)

# 5. 테스트셋에서의 예측
predictions = {}
for target in target_variables:
    grid_search.fit(X_train, y_train[target])
    predictions[target] = grid_search.predict(X_test)

# 예측 결과와 실제 값 비교
comparison_dfs = {}
for target in target_variables:
    comparison_dfs[target] = pd.DataFrame({
        f"Actual {target}": y_test[target],
        f"Predicted {target}": predictions[target]
    })

Fitting 5 folds for each of 243 candidates, totalling 1215 fits


Fitting 5 folds for each of 243 candidates, totalling 1215 fits
Fitting 5 folds for each of 243 candidates, totalling 1215 fits
Fitting 5 folds for each of 243 candidates, totalling 1215 fits


In [19]:
# 예: '외기온도'에 대한 비교 결과 출력
print(comparison_dfs['외기온도'].head())
print(comparison_dfs['수평면'].head())
print(comparison_dfs['모듈온도'].head())
print(comparison_dfs['경사면'].head())

      Actual 외기온도  Predicted 외기온도
1318         24.2       24.969775
899          43.0       41.315395
439          24.5       26.471294
560          23.9       29.403372
433          24.6       25.136857
      Actual 수평면  Predicted 수평면
1318         0.0       0.286919
899        626.0     554.205602
439         42.0      61.468814
560         33.0     140.669069
433          0.0       2.910868
      Actual 모듈온도  Predicted 모듈온도
1318         22.8       24.193406
899          52.2       46.031512
439          22.8       24.467112
560          23.0       27.138005
433          23.0       23.951652
      Actual 경사면  Predicted 경사면
1318         0.0       6.736264
899        674.0     502.709700
439         27.0      44.048440
560         32.0     111.688614
433          0.0      15.789200
