In [1]:
import warnings, torch
warnings.filterwarnings("ignore")

import pytorch_lightning as pl
pl.seed_everything(123)

from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, DeepAR, RecurrentNetwork, GroupNormalizer
from pytorch_forecasting.metrics import MAPE, NormalDistributionLoss, QuantileLoss, SMAPE

from math import floor
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression

from AnalyzeTools.models import autoregressive_integrated_moving_average, linear_regression, support_vector_regression, random_forest, gradient_boosting
from AnalyzeTools.prepare import data_split, model_eval, pathForSavingModels, retriveBestModelPath
from AnalyzeTools.preprocess import preprocessData
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

params_path = './Models'

  from .autonotebook import tqdm as notebook_tqdm
Global seed set to 123


In [2]:
''' Read data '''
raw_file_name = '경략가격집계 - 소,돼지'
product = "pork-hot"
product_attribute = "경락가격"

_output = 'MAX_COST_AMT' # MIN_COST_AMT
default_exclude_cols = ['JUDGE_GUBN', 'JUDGE_BREED', 'JUDGE_SEX', 'SABLE_GUBN', 'ABATT_CODE']

df = pd.read_csv('../Data/beef/경략가격집계 - 소,돼지.csv', encoding = 'euc_kr', engine ='python').query("JUDGE_KIND == 2")

df = df.drop(default_exclude_cols, axis=1)
df = df.groupby(['STD_DATE']).mean().reset_index()
df['STD_DATE'] = df['STD_DATE'].apply(lambda x: "20" + "-".join(x.split("/")))

df, _input = preprocessData(df, 'STD_DATE', _output)


Feature scores:
                 Features        Scores
6  DEFECT_MAX_COST_AMT  6.811574e+10
1         MIN_COST_AMT  1.222592e+04
5  DEFECT_MIN_COST_AMT  1.222443e+04
2         SUM_COST_AMT  4.538033e+02
7  DEFECT_SUM_COST_AMT  4.537454e+02
8    DEFECT_SUM_WEIGHT  2.279391e+01
3           SUM_WEIGHT  2.277102e+01
4           DEFECT_CNT  1.084715e+01
0                  CNT  1.083094e+01

TOP K features:
   ['MIN_COST_AMT', 'SUM_COST_AMT', 'DEFECT_MIN_COST_AMT', 'DEFECT_MAX_COST_AMT']


In [11]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

def TFT(
    data, 
    training_cutoff, 
    target, 
    group, 
    max_encoder_length, 
    max_prediction_length, 
    time_varying_known_categoricals,
    time_varying_unknown_categoricals, 
    time_varying_known_reals, 
    time_varying_unknown_reals,
    batch_size,
    saving_dir,
):
    best_model_path = retriveBestModelPath(saving_dir)

    
    
    training = TimeSeriesDataSet(
        data[lambda x: x.time_idx <= training_cutoff],
        time_idx="time_idx",
        target=target,
        group_ids=group,
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        time_varying_known_categoricals=time_varying_known_categoricals,
        time_varying_unknown_categoricals=time_varying_unknown_categoricals,
        time_varying_known_reals=time_varying_known_reals,
        time_varying_unknown_reals=time_varying_unknown_reals,
    )

    validation = TimeSeriesDataSet.from_dataset(
        training, 
        data, 
        min_prediction_idx=training.index.time.max() + 1,
        stop_randomization=True
    )

    train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    # if not best_model_path:
    early_stop_callback = EarlyStopping(monitor="val_loss", verbose=False, mode="min")
    lr_logger = LearningRateMonitor()  # log the learning rate

    trainer = pl.Trainer(
        max_epochs=100,
        gpus=0,
        weights_summary="top",
        callbacks=[lr_logger, early_stop_callback],
        log_every_n_steps=10,
        default_root_dir=saving_dir,
    )

    study = optimize_hyperparameters(
        train_dataloader,
        val_dataloader,
        model_path=saving_dir,
        n_trials=3,
        max_epochs=5,
        # gradient_clip_val_range=(0.01, 1.0),
        hidden_size_range=(8, 32),
        # hidden_continuous_size_range=(8, 256),
        # attention_head_size_range=(1, 4),
        # learning_rate_range=(0.001, 0.1),
        # dropout_range=(0.1, 0.3),
        trainer_kwargs=dict(limit_train_batches=30),
        reduce_on_plateau_patience=4,
        use_learning_rate_finder=False
    )

        # tft = TemporalFusionTransformer.from_dataset(
        #     training,
        #     hidden_size=128,
        #     attention_head_size=4,
        #     dropout=0.1,
        #     output_size=1,# 7 quantiles by default
        #     loss=MAPE(),
        #     log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
        # )
        # print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

        # trainer.fit(
        #     tft,
        #     train_dataloaders=train_dataloader,
        #     val_dataloaders=val_dataloader,
        # )
    
    return trainer, study, val_dataloader
    # best_model_path = retriveBestModelPath(saving_dir)
    # tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
    
    # return tft, val_dataloader

In [12]:
data = df.copy()

data['time_idx'] = range(len(data))
data['group'] = product

training_cutoff = floor(len(data) * 0.8)

max_prediction_length = 1
max_encoder_length = 30 # 7, 14, 30, 60, 120
batch_size = 64

group = ['group']
time_varying_known_categoricals = ['month', 'week']
time_varying_unknown_categoricals = []
time_varying_known_reals = ['time_idx']
time_varying_unknown_reals = _input + [_output]

data[time_varying_known_categoricals] = data[time_varying_known_categoricals].astype(str).astype("category")

In [13]:
trainer, study, val_dataloader = TFT(
    data,
    training_cutoff,
    _output,
    group,
    max_encoder_length,
    max_prediction_length,
    time_varying_unknown_categoricals,
    time_varying_known_categoricals,
    time_varying_known_reals,
    time_varying_unknown_reals,
    batch_size,
    pathForSavingModels(product, product_attribute, raw_file_name, 'TFT'),
)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


TypeError: optimize_hyperparameters() missing 1 required positional argument: 'model_path'

In [None]:
study.best_params

{'gradient_clip_val': 0.38428003216841616,
 'hidden_size': 15,
 'dropout': 0.25369474370115025,
 'hidden_continuous_size': 8,
 'attention_head_size': 4,
 'learning_rate': 0.01317899518137067}

In [14]:
tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

PermissionError: [Errno 13] Permission denied: 'd:/Agriculture prediction/Notebooks'

In [None]:
# print("\nTFT")
# tft, val_dataloader = TFT(
#     data,
#     training_cutoff,
#     _output,
#     group,
#     max_encoder_length,
#     max_prediction_length,
#     time_varying_unknown_categoricals,
#     time_varying_known_categoricals,
#     time_varying_known_reals,
#     time_varying_unknown_reals,
#     batch_size,
#     pathForSavingModels(product, product_attribute, raw_file_name, 'TFT'),
# )

actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = tft.predict(val_dataloader)

model_eval(actuals, predictions, stdout=True, vis=True)