In [8]:
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
import random
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings(action='ignore')

from tqdm import tqdm

In [9]:
train_df = pd.read_csv('./train_preprocessed2.csv')
test_df = pd.read_csv('./test_preprocessed.csv')
submission = pd.read_csv('./sample_submission.csv')
train_df.drop(columns = ['Unnamed: 0'], inplace=True)
train_df.drop(columns = ['holiday'], inplace=True)
test_df.drop(columns = ['Unnamed: 0'], inplace=True)

In [10]:
train_data = TimeSeriesDataFrame.from_data_frame(
    train_df,
    id_column="num",
    timestamp_column="date_time"
)
test_data = TimeSeriesDataFrame.from_data_frame(
    test_df,
    id_column="num",
    timestamp_column="date_time"
)

In [11]:
predictor = TimeSeriesPredictor(
    prediction_length=168,
    path="autogluon-m4-hourly",
    target="power",
    eval_metric="sMAPE",
    known_covariates_names=['temp', 'prec', 'wind', 'hum', 'day', 'month', 'week', 'sin_time', 'cos_time', 'THI', 'CDH']
)



In [12]:
####################################################################################################################################################################
# fast_training: 'Naive', 'SeasonalNaive', 'Theta', 'ETS', 'RecursiveTabular', 'WeightedEnsemble'
# medium_quality: 'Naive', 'SeasonalNaive', 'Theta', 'AutoETS', 'RecursiveTabular', 'DeepAR', 'WeightedEnsemble'
# high_quality: 'Naive', 'SeasonalNaive', 'Theta', 'AutoETS', 'RecursiveTabular', 'DeepAR', 'TemporalFusionTransformer', 'PatchTST', 'DirectTabular', 'AutoARIMA'
# best_quality: 
####################################################################################################################################################################

predictor.fit(
    train_data,
    presets="best_quality",
    excluded_model_types=["DirectTabular"]
)

TimeSeriesPredictor.fit() called
Setting presets to: best_quality
Fitting with arguments:
{'enable_ensemble': True,
 'evaluation_metric': 'sMAPE',
 'excluded_model_types': ['DirectTabular'],
 'hyperparameter_tune_kwargs': {'num_trials': 3,
                                'scheduler': 'local',
                                'searcher': 'auto'},
 'hyperparameters': 'best_quality',
 'num_val_windows': 1,
 'prediction_length': 168,
 'random_seed': None,
 'target': 'power',
 'time_limit': None,
 'verbosity': 2}
Provided training data set with 204000 rows, 100 items (item = single time series). Average time series length is 2040.0. Data frequency is 'H'.
AutoGluon will save models to autogluon-m4-hourly\
AutoGluon will gauge predictive performance using evaluation metric: 'sMAPE'
	This metric's sign has been flipped to adhere to being 'higher is better'. The reported score can be multiplied by -1 to get the metric value.

Provided dataset contains following columns:
	target:           'powe


Starting training. Start time is 2023-08-19 15:24:39
Excluded model types: ['DirectTabular']
	Found 'DirectTabular' model in hyperparameters, but 'DirectTabular' is present in `excluded_model_types` and will be removed.
Models that will be trained: ['Naive', 'SeasonalNaive', 'Theta', 'AutoETS', 'RecursiveTabular', 'DeepAR', 'TemporalFusionTransformer', 'PatchTST', 'AutoARIMA']
Hyperparameter tuning model: Naive. 
	-0.3399       = Validation score (-sMAPE)
	0.28    s     = Training runtime
	9.07    s     = Validation (prediction) runtime
Hyperparameter tuning model: SeasonalNaive. 
	-0.1015       = Validation score (-sMAPE)
	0.26    s     = Training runtime
	0.28    s     = Validation (prediction) runtime
Hyperparameter tuning model: Theta. 
	-0.1197       = Validation score (-sMAPE)
	0.32    s     = Training runtime
	110.85  s     = Validation (prediction) runtime
Hyperparameter tuning model: AutoETS. 
	-0.2072       = Validation score (-sMAPE)
	0.32    s     = Training runtime
	133.0

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x1b598bac850>

In [13]:
predictor.leaderboard()

                        model  score_val  pred_time_val  fit_time_marginal  fit_order
0            WeightedEnsemble  -0.062648     110.239136          15.945325         12
1   TemporalFusionTransformer  -0.067741       1.119034        8280.009601          9
2                   DeepAR\T3  -0.074569      48.422545       10582.007711          8
3                   DeepAR\T1  -0.079891      27.952863        4612.706381          6
4                   DeepAR\T2  -0.083745      32.744695        5541.331219          7
5                    PatchTST  -0.097123       0.940868         269.437189         10
6               SeasonalNaive  -0.101513       0.276913           0.264542          2
7                   AutoARIMA  -0.104492     336.610016           0.206596         11
8            RecursiveTabular  -0.106570      18.633645         437.363177          5
9                       Theta  -0.119660     110.845474           0.322620          3
10                    AutoETS  -0.207213     133.04756

Unnamed: 0,model,score_val,pred_time_val,fit_time_marginal,fit_order
0,WeightedEnsemble,-0.062648,110.239136,15.945325,12
1,TemporalFusionTransformer,-0.067741,1.119034,8280.009601,9
2,DeepAR\T3,-0.074569,48.422545,10582.007711,8
3,DeepAR\T1,-0.079891,27.952863,4612.706381,6
4,DeepAR\T2,-0.083745,32.744695,5541.331219,7
5,PatchTST,-0.097123,0.940868,269.437189,10
6,SeasonalNaive,-0.101513,0.276913,0.264542,2
7,AutoARIMA,-0.104492,336.610016,0.206596,11
8,RecursiveTabular,-0.10657,18.633645,437.363177,5
9,Theta,-0.11966,110.845474,0.32262,3


In [14]:
prediction = predictor.predict(train_data, known_covariates=test_data)
pre = prediction['mean'].to_list()
submission['answer'] = pre
submission.to_csv('./baseline_submission_hyun.csv', index=False)

Global seed set to 123
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble
