In [1]:

import torch, json
from math import floor
import pandas as pd
import numpy as np

from FileManager.dataManager import dataManager

from AnalyzeTools.models import autoregressive_integrated_moving_average, linear_regression, support_vector_regression, random_forest, gradient_boosting
from AnalyzeTools.prepare import data_split, model_eval, pathForSavingModels
from AnalyzeTools.preprocess import preprocessData
from AnalyzeTools.superModels import DEEPAR, TFT, RNN

params_path = './Models/single'
train_size = 0.8
product_object = json.load(open("./File information.json", "r", encoding='utf8'))

all_experiments= []
for product in product_object.keys():
    for raw_file_name in  product_object[product].keys():
        for product_type in product_object[product][raw_file_name]['product_types']:
            for target in product_object[product][raw_file_name]['targets']:
                all_experiments.append([product, raw_file_name, product_type, target])
i = 2
experiment = all_experiments[2]
print("*"*50 + f"  {i}/{len(all_experiments)}  "  + "*"*50)
product, raw_file_name, product_type, target = experiment
df, product_and_product_type, product_attribute = dataManager(raw_file_name, product, product_type, target)

if len(df) == 0:
    raise ValueError("No data!")

df, input_features = preprocessData(df, 'date', target)
predictions_x_axis = df['date'][floor(len(df)*train_size):].values


  from .autonotebook import tqdm as notebook_tqdm
Global seed set to 123


**************************************************  2/63  **************************************************
There are too few features in the data. The raw data features will be used.
Too few features to filter!

-->Final features:
  []


In [2]:
input_features

[]

In [3]:

# prepare dataset for statistics and Macnhine models
ml_split_params = {'Model': 'ML', 'Future': 1}
X_train, X_test, y_train, y_test, input_scaler, output_scaler = data_split(df, input_features, output=target, train_size=train_size, scaling=True, **ml_split_params)

''' Input data into models and Evaluate model results '''
ml_searchCV_params = {
    'base_dir': params_path,
    'product': product_and_product_type,
    'attribute': product_attribute,
    'raw': raw_file_name,
    'save': True
}
# stdout = False
# vis = False

# print("\nARIMA")
# arima_predictions = autoregressive_integrated_moving_average(y_train, y_test)
# model_eval(y_test, arima_predictions, predictions_x_axis, stdout=stdout, vis=vis, **{'scaler': output_scaler})

print("\nLinear Regression")
lr, _ = linear_regression(X_train, y_train)
# lr_predictions = lr.predict(X_test)
# model_eval(y_test, lr_predictions, predictions_x_axis, stdout=stdout, vis=vis, **{'scaler': output_scaler})

print("\nSupport Vector Regression")
svr, _ = support_vector_regression(X_train, y_train, search=True, **ml_searchCV_params)
# svr_predictions = svr.predict(X_test)
# model_eval(y_test, svr_predictions, predictions_x_axis, stdout=stdout, vis=vis, **{'scaler': output_scaler})

print("\nRandom Forest")
rf, _ = random_forest(X_train, y_train, search=True, **ml_searchCV_params, **{'scaler': output_scaler})
# rf_predictions = rf.predict(X_test)
# model_eval(y_test, rf_predictions, predictions_x_axis, stdout=stdout, vis=vis, **{'scaler': output_scaler})

print("\nGradient Boosting")
gb, _ = gradient_boosting(X_train, y_train, search=True, **ml_searchCV_params)
# gb_predictions = gb.predict(X_test)
# model_eval(y_test, gb_predictions, predictions_x_axis, stdout=stdout, vis=vis, **{'scaler': output_scaler})



[32m[I 2022-11-26 22:58:38,068][0m A new study created in memory with name: no-name-c993b3b0-3d3e-42f2-9afa-343888ba0814[0m


X_train: (997, 1) y_train: (997,) X_test: (250, 1) y_test: (250,)

Linear Regression

Support Vector Regression
--> Start searching best parameters!

Best parameter for SVR is:
  {'C': 1, 'gamma': 0.01, 'epsilon': 0.05, 'kernel': 'rbf'}

Random Forest
--> Start searching best parameters!

Best parameter for Random forest is:
  {'n_estimators': 270, 'max_depth': 1, 'max_features': 1, 'min_samples_leaf': 9, 'min_samples_split': 5}

Gradient Boosting
--> Start searching best parameters!

Best parameter for Gradient Boosting is:
  {'n_estimators': 110, 'max_depth': 1, 'max_features': 1, 'min_samples_leaf': 3, 'min_samples_split': 3}


In [4]:
data = df.copy()

data['time_idx'] = range(len(data))
data['group'] = product

training_cutoff = floor(len(data) * train_size)

max_prediction_length = 1
max_encoder_length = 30 # 7, 14, 30, 60, 120
batch_size = 64

group = ['group']
time_varying_known_categoricals = ['month', 'week']
time_varying_unknown_categoricals = []
time_varying_known_reals = ['time_idx']
time_varying_unknown_reals = input_features + [target]

In [5]:
# import warnings, torch, shutil
# warnings.filterwarnings("ignore")

# import pytorch_lightning as pl
# pl.seed_everything(123)

# from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
# from pytorch_lightning.loggers import TensorBoardLogger

# from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, DeepAR, RecurrentNetwork, GroupNormalizer
# from pytorch_forecasting.metrics import MAPE, NormalDistributionLoss, QuantileLoss, SMAPE

# from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
# from AnalyzeTools.prepare import retriveBestModelPath

# def RNN(
#     data, 
#     training_cutoff, 
#     target, 
#     group, 
#     max_encoder_length, 
#     max_prediction_length, 
#     time_varying_known_categoricals,
#     time_varying_unknown_categoricals, 
#     time_varying_known_reals, 
#     batch_size,
#     saving_dir,
#     cell='LSTM'
# ):
#     best_model_path = retriveBestModelPath(saving_dir)

#     data[time_varying_known_categoricals] = data[time_varying_known_categoricals].astype(str).astype("category")
#     training = TimeSeriesDataSet(
#         data[lambda x: x.time_idx <= training_cutoff],
#         time_idx="time_idx",
#         target=target,
#         group_ids=group,
#         max_encoder_length=max_encoder_length,
#         max_prediction_length=max_prediction_length,
#         time_varying_known_categoricals=time_varying_known_categoricals,
#         time_varying_unknown_categoricals=time_varying_unknown_categoricals,
#         time_varying_known_reals=time_varying_known_reals,
#         time_varying_unknown_reals=[target],
#     )

#     validation = TimeSeriesDataSet.from_dataset(
#         training, 
#         data, 
#         min_prediction_idx=training.index.time.max() + 1 + max_encoder_length - 1,
#         stop_randomization=True
#     )

#     train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
#     val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

#     print(best_model_path)
#     if not best_model_path:
#         early_stop_callback = EarlyStopping(monitor="val_loss", verbose=False, mode="min")
#         lr_logger = LearningRateMonitor()

#         trainer = pl.Trainer(
#             max_epochs=100,
#             gpus=0,
#             weights_summary='top',
#             callbacks=[lr_logger, early_stop_callback],
#             log_every_n_steps=10,
#             check_val_every_n_epoch=3,
#             default_root_dir=saving_dir,
#         )

#         model = RecurrentNetwork.from_dataset(
#             training,
#             cell_type=cell,
#             hidden_size=128,
#             rnn_layers=1,
#             dropout=0.1,
#             output_size=1,
#             loss=MAPE(),
#             log_interval=10
#         )

#         trainer.fit(
#             model,
#             train_dataloaders=train_dataloader,
#             val_dataloaders=val_dataloader
#         )
    
#     best_model_path = retriveBestModelPath(saving_dir)
#     best_model = RecurrentNetwork.load_from_checkpoint(best_model_path, cell_type=cell)

#     return best_model, val_dataloader

In [7]:
pathForSavingModels(product_and_product_type, product_attribute, raw_file_name, 'LSTM')

'd:\\Agriculture prediction\\Notebooks/Models/single/pork(CTSED_CODE=4301)/소매가격/(중)축산유통정보 - 소비자가격/LSTM'

In [18]:
print("\nLSTM")
lstm, val_dataloader = RNN(
    data,
    training_cutoff,
    target,
    group,
    max_encoder_length,
    max_prediction_length,
    time_varying_known_categoricals,
    time_varying_unknown_categoricals,
    time_varying_known_reals,
    batch_size,
    pathForSavingModels(product, product_and_product_type, raw_file_name, 'LSTM'),
    'LSTM'
)

# actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
# lstm_predictions = lstm.predict(val_dataloader)
# model_eval(actuals, lstm_predictions, predictions_x_axis, stdout=True, vis=True)

print("\nGRU")
gru, val_dataloader = RNN(
    data,
    training_cutoff,
    target,
    group,
    max_encoder_length,
    max_prediction_length,
    time_varying_known_categoricals,
    time_varying_unknown_categoricals,
    time_varying_known_reals,
    batch_size,
    pathForSavingModels(product, product_and_product_type, raw_file_name, 'GRU'),
    'GRU'
)

# actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
# gru_predictions = gru.predict(val_dataloader)
# model_eval(actuals, gru_predictions, predictions_x_axis, stdout=True, vis=True)

print("\nDeepAR")
deep_ar, val_dataloader = DEEPAR(
    data,
    training_cutoff,
    target,
    group,
    max_encoder_length,
    max_prediction_length,
    time_varying_known_categoricals,
    time_varying_unknown_categoricals,
    time_varying_known_reals,
    batch_size,
    pathForSavingModels(product, product_and_product_type, raw_file_name, 'DEEPAR'),
)

# actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
# deepar_predictions = deep_ar.predict(val_dataloader)

# model_eval(actuals, deepar_predictions, predictions_x_axis, stdout=True, vis=True)

print("\nTFT")
tft, val_dataloader = TFT(
    data,
    training_cutoff,
    target,
    group,
    max_encoder_length,
    max_prediction_length,
    time_varying_unknown_categoricals,
    time_varying_known_categoricals,
    time_varying_known_reals,
    time_varying_unknown_reals,
    batch_size,
    pathForSavingModels(product, product_and_product_type, raw_file_name, 'TFT'),
)

# actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
# tft_predictions = tft.predict(val_dataloader)
# model_eval(actuals, tft_predictions, predictions_x_axis, stdout=True, vis=True)


GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: d:\Agriculture prediction\Notebooks\Models\single\pork\pork(CTSED_CODE=4301)\(중)축산유통정보 - 소비자가격\LSTM\lightning_logs

  | Name             | Type           | Params
----------------------------------------------------
0 | loss             | MAPE           | 0     
1 | logging_metrics  | ModuleList     | 0     
2 | embeddings       | MultiEmbedding | 867   
3 | rnn              | LSTM           | 78.3 K
4 | output_projector | Linear         | 129   
----------------------------------------------------
79.3 K    Trainable params
0         Non-trainable params
79.3 K    Total params
0.317     Total estimated model params size (MB)



LSTM
False
Epoch 14: 100%|██████████| 19/19 [00:02<00:00,  9.23it/s, loss=0.116, v_num=0, train_loss_step=0.105, train_loss_epoch=0.117, val_loss=0.0897] 

GRU


GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


False


Missing logger folder: d:\Agriculture prediction\Notebooks\Models\single\pork\pork(CTSED_CODE=4301)\(중)축산유통정보 - 소비자가격\GRU\lightning_logs

  | Name             | Type           | Params
----------------------------------------------------
0 | loss             | MAPE           | 0     
1 | logging_metrics  | ModuleList     | 0     
2 | embeddings       | MultiEmbedding | 867   
3 | rnn              | GRU            | 58.8 K
4 | output_projector | Linear         | 129   
----------------------------------------------------
59.7 K    Trainable params
0         Non-trainable params
59.7 K    Total params
0.239     Total estimated model params size (MB)


Epoch 14: 100%|██████████| 19/19 [00:01<00:00, 10.76it/s, loss=0.117, v_num=0, train_loss_step=0.122, train_loss_epoch=0.116, val_loss=0.0903] 

DeepAR


GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: d:\Agriculture prediction\Notebooks\Models\single\pork\pork(CTSED_CODE=4301)\(중)축산유통정보 - 소비자가격\DEEPAR\lightning_logs

  | Name                   | Type                   | Params
------------------------------------------------------------------
0 | loss                   | NormalDistributionLoss | 0     
1 | logging_metrics        | ModuleList             | 0     
2 | embeddings             | MultiEmbedding         | 867   
3 | rnn                    | LSTM                   | 211 K 
4 | distribution_projector | Linear                 | 258   
------------------------------------------------------------------
212 K     Trainable params
0         Non-trainable params
212 K     Total params
0.850     Total estimated model params size (MB)


Epoch 18:   0%|          | 0/15 [00:00<?, ?it/s, loss=8.59, v_num=0, train_loss_step=8.590, train_loss_epoch=8.590, val_loss=8.550]         

[32m[I 2022-11-26 22:56:11,311][0m A new study created in memory with name: no-name-a761f040-867d-424c-85c9-25179dde0017[0m



TFT


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
