In [None]:
# !pip install -U pip
# !pip install -U setuptools wheel
# !pip install autogluon

In [None]:
import pandas as pd
import numpy as np
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

In [None]:
future_covariates = [
    'temp', 'prec', 'wind', 'hum', 'isolation',
    'sunshine', 'use', 'area_1', 'area_2', 'hour', 'month', 'holiday',
    'sin_hour', 'cos_hour', 'sin_date', 'cos_date',
    'sin_weekday', 'cos_weekday', 'summer_sin', 'summer_cos', 'temp_F',
    'temp2', 'THI', 'WC', 'is_rain', 'log_temp', 'wind_power', 'dew_point',
    'solar_per_hour', 'CDH',
    'target_mean_1', 'target_std_1', 'target_min_1', 'target_max_1', 'z_score',
    'min_temp', 'max_temp', 'min_wind', 'max_wind',
    'min_hum', 'max_hum', 'mean_THI', 'mean_CDH', 'min_log_temp', 'max_log_temp', 'mean_WC'
]

train_df = pd.read_csv('./data/train_p_final.csv')
train_df["log_power"] = np.log(train_df["power"])
cat_features = ['holiday','use']
for col in cat_features:
    train_df[col] = train_df[col].astype('category')
num_cols = train_df.select_dtypes(include=["number"]).columns
train_df[num_cols] = train_df[num_cols].clip(lower=-1e6, upper=1e6)
train_df.rename(columns={'build_num':'item_id', 'date_time':'timestamp'}, inplace=True)
train_df.drop(columns=['num_date_time'], inplace=True)
train_df.drop(columns=drop_col, inplace=True)

data = TimeSeriesDataFrame(train_df)
predictor = TimeSeriesPredictor(
    prediction_length=7*24,
    target="power",
    eval_metric="SMAPE",
    known_covariates_names=future_covariates,
)
# seed 고정
predictor.fit(data, random_seed=42, presets='best_quality', num_val_windows=7*24)
predictor.refit_full()

In [None]:
model_list = [
 'ChronosFineTuned[bolt_small]_FULL',
 'ChronosZeroShot[bolt_base]_FULL',
 'DirectTabular_FULL',
 'TiDE_FULL',
 'RecursiveTabular_FULL',
 'NPTS_FULL',
 'SeasonalNaive_FULL',
 'DynamicOptimizedTheta_FULL',
 'PatchTST_FULL',
 'AutoETS_FULL',
 'TemporalFusionTransformer_FULL',
 'DeepAR_FULL',
]

history_df = pd.read_csv('./data/train_p_final.csv')
history_df["log_power"] = np.log(history_df["power"])
cat_features = ['holiday','use']
for col in cat_features:
    history_df[col] = history_df[col].astype('category')
num_cols = history_df.select_dtypes(include=["number"]).columns
history_df[num_cols] = history_df[num_cols].clip(lower=-1e6, upper=1e6)
history_df.rename(columns={'build_num':'item_id', 'date_time':'timestamp'}, inplace=True)
history_df.drop(columns=['num_date_time'], inplace=True)

# TimeSeriesDataFrame 변환
history_tsd = TimeSeriesDataFrame(history_df)

# 미래 covariates 준비
test_df = pd.read_csv('./data/test_p_final.csv')
test_df.drop(columns=['num_date_time'], inplace=True)
for col in ['holiday','use']:
    test_df[col] = test_df[col].astype('category')
num_cols = test_df.select_dtypes(include=["number"]).columns
test_df[num_cols] = test_df[num_cols].clip(lower=-1e6, upper=1e6)
test_df.rename(columns={'build_num':'item_id', 'date_time':'timestamp'}, inplace=True)

future_cov_tsd = TimeSeriesDataFrame(test_df[['item_id','timestamp'] + future_covariates])


for model in model_list:
    # 예측
    pred = predictor.predict(
        data=history_tsd,            # 과거 power 포함된 데이터
        known_covariates=future_cov_tsd,
        model=model
    )
    submission = pd.read_csv('./sample_submission.csv')
    submission['answer'] = pred.reset_index()['mean']
    submission.to_csv(f'./submission/ag_ts_{model}.csv', index=False)