In [None]:
import os
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.utils.plotting import plot_series
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

# Read the preprocessed data
train = pd.read_csv('./data/train_preprocessed.csv').iloc[:, 1:]
test = pd.read_csv('./data/test_preprocessed.csv').iloc[:, 1:]
log = False
train = pop_feat(train, log, True)
test = pop_feat(test, log, False)

# Define a function to fit AutoGluon model
def fit_autogluon(train, test, num, path):
    y_train, y_valid, x_train, x_valid = train_test_split(train, num, 168)
    x_train['power'] = y_train
    train_data = pd.concat([x_train, y_train], axis=1)
    
    # Initialize and fit AutoGluon predictor
    predictor = TabularPredictor(label='power')
    predictor.fit(train_data)
    
    # Predict on validation set
    y_pred = predictor.predict(x_valid)
    
    # Plot predictions
    plot_series(y_train, y_valid, pd.Series(y_pred), markers=[',', ',', ','])
    plt.title(num)
    os.makedirs(os.path.join(path, 'results'), exist_ok=True)
    plt.savefig(os.path.join(path, 'results', str(num)))
    plt.show()
    
    # Evaluate performance
    smape_val = SMAPE(y_valid, y_pred)
    r2_val = r2_score(y_valid, y_pred)
    best_model_type = 'AutoGluon'  # AutoGluon doesn't provide the model type directly
    print('Best model: {}'.format(best_model_type))
    print('SMAPE: {}'.format(smape_val))
    return best_model_type, smape_val, r2_val, y_valid, y_pred

# Save path for results
save_path = 'exp/autogluon_results'
os.makedirs(save_path, exist_ok=True)

# Iterate over buildings
summary_list = []
ans_val_list = []
pred_val_list = []

for i in tqdm(range(100)):
    summary_dict = {}
    best_model_type, smape_val, r2_val, ans_val, pred_val = fit_autogluon(train, test, i + 1, save_path)

    summary_dict['건물번호'] = i + 1
    summary_dict['Best_model'] = best_model_type
    summary_dict['SMAPE_val'] = smape_val
    summary_dict['R2_val'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
smape_val = SMAPE(ans_val_whole, pred_val_whole)
pd.DataFrame(summary_list).to_csv(os.path.join(save_path, 'summary_log_{}.csv'.format(smape_val)), index=False)
