In [5]:
# 개별 모델을 각 목표 변수에 대해 훈련시키고 예측 수행
def train_and_predict_individual_models(models, X_train, y_train, X_test):
    import numpy as np
    predictions = []
    for target_index in range(y_train.shape[1]):
        target_predictions = np.zeros(X_test.shape[0])
        for model in models:
            model.fit(X_train, y_train.iloc[:, target_index])
            target_predictions += model.predict(X_test)
        predictions.append(target_predictions / len(models))
    return np.column_stack(predictions)

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error

# 데이터 로드
file_path = "./merged_result.csv"
data = pd.read_csv(file_path)

# 특성 및 목표 변수 선택
features = ['강수확률', '일최저기온', '1시간기온', '하늘상태', '일최고기온', '습도', '풍향', '1시간강수량', '풍속']
target_vars = ['수평면', '외기온도', '경사면', '모듈온도']
X = data[features]
y = data[target_vars]

# 범주형 특성 원-핫 인코딩
encoder = OneHotEncoder(drop='first', sparse=False)
encoded_features = encoder.fit_transform(X[['하늘상태']])
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(['하늘상태']))
X_encoded = pd.concat([X.drop(columns=['하늘상태']), encoded_df], axis=1)

# 학습 및 테스트 세트 분할
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# 개별 모델 정의
models = [
    RandomForestRegressor(random_state=42),
    GradientBoostingRegressor(random_state=42),
    SVR(kernel='linear')
]

ensemble_predictions_individual = train_and_predict_individual_models(models, X_train, y_train, X_test)

# 성능 평가
ensemble_mse_individual = mean_squared_error(y_test, ensemble_predictions_individual)
ensemble_mae_individual = mean_absolute_error(y_test, ensemble_predictions_individual)



In [7]:
# 테스트 데이터 중 하나의 샘플 인덱스 선택
sample_index = 5

# 해당 샘플에 대한 예측값과 실제값
sample_ensemble_prediction_individual = ensemble_predictions_individual[sample_index]
sample_actual_value_individual = y_test.iloc[sample_index].values

ensemble_mse_individual, ensemble_mae_individual, sample_ensemble_prediction_individual, sample_actual_value_individual

(15563.985340694793,
 55.90053576223588,
 array([581.21729013,  36.12380304, 577.03424936,  43.58063132]),
 array([495. ,  36.2, 546. ,  42.1]))