In [115]:
!pip install "gluonts[torch]"



In [116]:
import pandas as pd
import numpy as np
from gluonts.dataset.pandas import PandasDataset
from gluonts.torch.model.deepar import DeepAREstimator
#from gluonts.torch.distributions import NegativeBinomialOutput
from gluonts.torch.distributions import NormalOutput
import json



In [117]:
def Create_Data_Window(df,N,W,stride):
    '''
    Create W windows
    '''
    lis = []
    for i in range(W):
        lis.append(df.iloc[:(N+i*stride)*111,:].reset_index().iloc[:,1:])
    return lis

In [118]:
def Create_testData_Window(df,N,W,stride):
    '''
    Create W windows
    '''
    lis = []
    for i in range(W):
        lis.append(df.iloc[(N+i*stride)*111:(N+(i+1)*stride)*111,:].reset_index().iloc[:,1:])
    return lis

In [119]:
# Split train and test
path = '/kaggle/input/tourism-data/'
df = pd.read_csv(path+'Tourism_process_for_deepar.csv')
#df.set_index('Date',inplace=True)
prediction_length = 12
freq = 'MS'
N = 168
stride = 12
W = 4

train_lis = Create_Data_Window(df,N,W,stride)


train1 = train_lis[0].drop_duplicates(subset=['Node','Region','Zone','State'])
train_static = pd.DataFrame({'State':train1['State'],
                             'Zone':train1['Zone'],
                             'Region':train1['Region'],
                             'Node':train1['Node']})
train_static.set_index('Node',inplace=True)

sp = []
train_standard_lis = []
for train_ds in train_lis:
    train_group = train_ds.groupby('Node')
    standardized_params = {}
    train_standard = train_ds.copy()
    for cat,group in train_group:
        means = group['Value'].mean()
        stds = group['Value'].std()
        standardized_params[cat] = {'mean':means,'std':stds}
        train_standard.loc[group.index,'Value'] = (group['Value']-means)/stds
        
    train_standard_ds = PandasDataset.from_long_dataframe(train_standard.iloc[:,[0,1,2,3]],
                                                target="Value",
                                                timestamp='Date',
                                                freq='M',
                                                item_id="Node",
                                                feat_dynamic_real=["Month_Of_Year"],
                                                static_features=train_static)
    
    train_standard_lis.append(train_standard_ds)
    sp.append(standardized_params)


In [120]:
test_lis = Create_testData_Window(df,N,W,stride)


In [121]:
test_ds_lis = []
for i in range(len(test_lis)):
    test_ds = PandasDataset.from_long_dataframe(test_lis[i].iloc[:,[0,1,2,3]],
                                                 target="Value",
                                                 item_id="Node",
                                                 feat_dynamic_real=["Month_Of_Year"],
                                                 static_features=train_static,
                                                 timestamp='Date',
                                                 freq='M')
    test_ds_lis.append(test_ds)

In [122]:
from gluonts.evaluation import make_evaluation_predictions
for i in range(3,4):
    # Estimator
    estimator = DeepAREstimator(freq=freq,
                                prediction_length=prediction_length,
                                context_length=10*prediction_length,
                                num_layers=3,
                                hidden_size=41,
                                lr=1e-2,
                                weight_decay=1e-8,
                                dropout_rate=0.1,
                                num_feat_dynamic_real=1,
                                num_feat_static_real=3,
                                distr_output=NormalOutput(),
                                patience=10,
                                scaling=False,
                                num_parallel_samples=1000,
                                batch_size=16,
                                trainer_kwargs={'accelerator':'gpu','max_epochs':300})

    # Train
    predictor = estimator.train(train_standard_lis[i],cache_data=True)

    # Predict
    # forecast_it = predictor.predict(test_ds_lis[i],num_samples=1000)
    # forecasts = list(forecast_it)
    forecast_it, ts_it = make_evaluation_predictions(dataset=test_ds_lis[i], predictor=predictor,num_samples=1000)
    forecasts = list(forecast_it)
    
    # Anti standardized
    f_cp = forecasts
    for index in range(len(forecasts)):
        index_mean = sp[i][forecasts[index].item_id]['mean']
        index_std = sp[i][forecasts[index].item_id]['std']
        f_cp[index].samples = (forecasts[index].samples)*index_std+index_mean

    if i==0:
        # sort
        node_nonsort = []
        new_df = pd.read_csv(path+'/Tourism_process.csv')
        node_list = new_df.columns[2:]
        for k in range(len(forecasts)):
            node_nonsort.append(forecasts[k].item_id)
        index_list = [node_nonsort.index(m) for m in node_list]

    # Save the distri params
    distr_params = []
    for j in range(12):
        params_mean = []
        params_var = []
        for m in range(len(forecasts)):
            params_mean.append(float(np.mean(forecasts[m].samples[:,j])))
            params_var.append(float(np.var(forecasts[m].samples[:,j],ddof=1)))
        distr_params.append([params_mean,params_var])

    distr_params_new = distr_params
    for j in range(12):
        distr_params_new[j][0] = [distr_params[j][0][k] for k in index_list]
        distr_params_new[j][1] = [distr_params[j][1][k] for k in index_list]

    with open('Tourism_deepar_e300_optuna_in_{}.json'.format(i+1),'w') as file:
        file.write(json.dumps(distr_params_new))

INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
/opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name  | Type        | Params | In sizes                                                      | Out sizes    
----------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 38.6 K | [[1, 1], [1, 3], [1, 156, 3], [1, 156], [1, 156], [1, 12, 3]] | [1, 1000, 12]
----------------------------------------------------------------------------------------------------------------------
38.6 K    Trainable params
0         Non-trainable params
38.6 K    Total params
0.154     Total estimated model params size (M

Training: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 50: 'train_loss' reached 0.95634 (best 0.95634), saving model to '/kaggle/working/lightning_logs/version_34/checkpoints/epoch=0-step=50.ckpt' as top 1
INFO: Epoch 1, global step 100: 'train_loss' reached 0.80588 (best 0.80588), saving model to '/kaggle/working/lightning_logs/version_34/checkpoints/epoch=1-step=100.ckpt' as top 1
INFO: Epoch 2, global step 150: 'train_loss' reached 0.79586 (best 0.79586), saving model to '/kaggle/working/lightning_logs/version_34/checkpoints/epoch=2-step=150.ckpt' as top 1
INFO: Epoch 3, global step 200: 'train_loss' was not in top 1
INFO: Epoch 4, global step 250: 'train_loss' reached 0.75610 (best 0.75610), saving model to '/kaggle/working/lightning_logs/version_34/checkpoints/epoch=4-step=250.ckpt' as top 1
INFO: Epoch 5, global step 300: 'train_loss' was not in top 1
INFO: Epoch 6, global step 350: 'train_loss' reached 0.74473 (best 0.74473), saving model to '/kaggle/working/lightning_logs/version_34/checkpoints/epoch=6-st

In [125]:
ls /kaggle/working/lightning_logs/version_34/checkpoints

'epoch=234-step=11750.ckpt'


In [87]:
!cat Tourism_deepar_e300_optuna_in_1.json

[[[39847.734375, 14139.826171875, 8765.724609375, 10091.1318359375, 2853.386474609375, 3580.895263671875, 1424.3382568359375, 1084.356689453125, 3818.81884765625, 4502.72509765625, 2694.3583984375, 1521.8067626953125, 1717.99560546875, 759.3115234375, 3863.904052734375, 1185.2330322265625, 1487.930419921875, 1245.095458984375, 1130.4293212890625, 5641.70263671875, 1581.614013671875, 1955.6597900390625, 851.1113891601562, 1040.380615234375, 553.0516357421875, 471.6122741699219, 414.3499450683594, 2305.26416015625, 499.8637390136719, 334.80780029296875, 487.5422058105469, 362.82745361328125, 209.64340209960938, 523.8480834960938, 216.84962463378906, 3088.246826171875, 811.6580810546875, 1281.9310302734375, 3184.276611328125, 2495.029052734375, 564.0869750976562, 331.8970031738281, 495.3888854980469, 350.8620910644531, 669.4297485351562, 545.6260986328125, 276.2684020996094, 220.1829071044922, 667.1642456054688, 2078.16455078125, 722.2625732421875, 447.09490966796875, 867.1732788085938, 3