In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# 데이터 전처리
data = pd.read_csv("./merged_result.csv")
data['풍향'] = np.sin(np.deg2rad(data['풍향']))

features_to_scale = ['강수확률', '일최저기온', '하늘상태', '일최고기온', '습도', '풍향', '풍속']
scaler = MinMaxScaler()
data[features_to_scale] = scaler.fit_transform(data[features_to_scale])

important_features = ['time']
scaler_important = MinMaxScaler(feature_range=(0.5, 1))
data[important_features] = scaler_important.fit_transform(data[important_features])

# 데이터 분할
input_features = features_to_scale + important_features
target_variables = ['수평면', '외기온도', '경사면', '모듈온도']
X = data[input_features]
y = data[target_variables]

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# 하이퍼파라미터 튜닝
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 0.9, 1],
    'max_features': ['sqrt', 'log2', None]
}

best_params = {}

for target in target_variables:
    grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid, cv=5, n_jobs=-1, verbose=1)
    grid_search.fit(X_train, y_train[target])
    best_params[target] = grid_search.best_params_

# 최적의 하이퍼파라미터로 조기 종료를 적용하여 모델 학습 및 평가
test_errors = {}
predictions = {}
for target in target_variables:
    min_val_error = float("inf")
    error_going_up = 0
    gbr = GradientBoostingRegressor(**best_params[target], random_state=42)
    for n_estimators in range(1, 120):
        gbr.n_estimators = n_estimators
        gbr.fit(X_train, y_train[target])
        y_pred_val = gbr.predict(X_val)
        val_error = mean_squared_error(y_val[target], y_pred_val)
        if val_error < min_val_error:
            min_val_error = val_error
            error_going_up = 0
        else:
            error_going_up += 1
            if error_going_up == 10:
                break  # 조기 종료
    y_pred_test = gbr.predict(X_test)
    predictions[target] = gbr.predict(X_test)
    test_errors[target] = mean_squared_error(y_test[target], y_pred_test)

print(test_errors)

In [None]:
comparison_dfs = {}
for target in target_variables:
    comparison_dfs[target] = pd.DataFrame({
        f"Actual {target}": y_test[target],
        f"Predicted {target}": predictions[target]
    })

# 예: '외기온도', '수평면', '모듈온도', '경사면'에 대한 비교 결과 출력
print(comparison_dfs['외기온도'].head())
print(comparison_dfs['수평면'].head())
print(comparison_dfs['모듈온도'].head())
print(comparison_dfs['경사면'].head())