In [56]:
import json
from itertools import islice
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)

from pts.dataset import recipe as rcp


TIME_SERIE_LENGTH = 10
PREDICTION_LENGTH = 5
NUMBER_OF_TIME_SERIES = 1000
        
def write_to_file(recipe, length, num_ts, file_name):
    with open("{}.json".format(file_name), 'w') as f:
        for x in islice(rcp.generate(length, recipe, "2019-01-07 00:00"), num_ts):
            z = {}
            for k in x:
                if type(x[k]) == np.ndarray:
                    z[k] = x[k].tolist()
                else:
                    z[k] = x[k]
            f.write(json.dumps(z))
            f.write('\n')
        

        
        
DATASET_REFERENCES = {
    "gaussian": 4 + rcp.RandomGaussian()
}        

for fn in DATASET_REFERENCES:
    recipe = [("input", 1.), ("target", DATASET_REFERENCES[fn])]
    
    write_to_file(recipe, length=TIME_SERIE_LENGTH, num_ts=NUMBER_OF_TIME_SERIES, file_name=fn)



In [None]:
from pts.dataset.artificial import RecipeDataset
from pts.dataset import FileDataset, MetaData

from pts.dataset.repository import get_dataset
from pts.evaluation import make_evaluation_predictions, Evaluator
from pts.model.deepar import DeepAREstimator
from pts.model.simple_feedforward import SimpleFeedForwardEstimator
from pts.modules import (
    ImplicitQuantileOutput,
    PiecewiseLinearOutput,
)
from pts import Trainer

metadata = MetaData(freq="H", prediction_length=PREDICTION_LENGTH)

NUM_EPOCHS = 10
NUM_BATCHES_PER_EPOCH = 120
NUM_TRAININGS = 5
NUM_SAMPLES = 100

def run_one_training(dataset, distr_output):
    estimator = SimpleFeedForwardEstimator(
        distr_output=distr_output,
        freq=metadata.freq,
        prediction_length=metadata.prediction_length,
        num_hidden_dimensions=[10],
        trainer=Trainer(device="cpu",
                        epochs=NUM_EPOCHS,
                        learning_rate=1e-3,
                        num_batches_per_epoch=NUM_BATCHES_PER_EPOCH,
                        batch_size=256,
                        num_workers=1,
                        ),
    )
    predictor = estimator.train(dataset)
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset,  # test dataset
        predictor=predictor,  # predictor
        num_samples=NUM_SAMPLES,  # number of sample paths we want for evaluation
    )
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator()
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset))
    
    all_quantiles_df = pd.DataFrame(
        [
            [forecasts[item_id].quantile(q/100.)[0] for q in range(0, 100)] 
            for item_id in range(len(forecasts))
        ]
    )
    sampled_quantiles = all_quantiles_df.mean(axis=0)
    return estimator, agg_metrics, sampled_quantiles


def run_several_training(dataset, distr_output, num_trainings=NUM_TRAININGS):
    estimators = []
    all_agg_metrics = []
    all_sampled_quantiles = []
    for i in range(num_trainings):
        estimator, agg_metrics, sampled_quantiles = run_one_training(dataset, distr_output)
        estimators.append(estimator)
        all_agg_metrics.append(agg_metrics)
        all_sampled_quantiles.append(sampled_quantiles)
        
    return estimators, all_agg_metrics, all_sampled_quantiles

def plot_quantile_functions(all_sampled_quantiles):
    for serie in all_sampled_quantiles:
        plt.plot(serie)

def summarize_metrics(all_agg_metrics):
    KEY_METRICS = ["MASE", "sMAPE", "MSIS"]
    agg_func = [np.mean, np.min, np.max]

    all_metrics = pd.DataFrame(all_agg_metrics)
    QUANTILE_METRICS = [c for c in all_metrics.columns if ("Coverage" in c) or ("wQuantile" in c)]
    
    display(all_metrics[KEY_METRICS].agg(agg_func, axis=0).T)
    display(all_metrics[QUANTILE_METRICS].agg(agg_func, axis=0).T)
    

store_results = {}
distr_outputs = {
    'iqn': ImplicitQuantileOutput(output_domain="Real"),
    'piecewiseLinear': PiecewiseLinearOutput(num_pieces=15)
}


for fn in DATASET_REFERENCES:
    dataset = FileDataset("{}.json".format(fn), metadata.freq, shuffle=True)
    for dn in distr_outputs:
        distr_output = distr_outputs[dn]
        estimators, all_agg_metrics, all_sampled_quantiles = run_several_training(dataset, distr_output)
        store_results[(fn, dn)] = {
            "estimators": estimators,
            "all_agg_metrics": all_agg_metrics, 
            "all_sampled_quantiles": all_sampled_quantiles,
        }
        





117it [00:02, 40.59it/s, avg_epoch_loss=0.814, epoch=0]

In [None]:
for fn in DATASET_REFERENCES:
    true_distribution = DATASET_REFERENCES[fn]
    random_values = rcp.evaluate([('target', test)], length=10000)['target']
    true_quantiles = [np.quantile(output, q=q/100) for q in range(100)]
    
    for dn in distr_outputs:
        all_sampled_quantiles = store_results[(fn, dn)]["all_sampled_quantiles"]
        plt.plot(true_quantiles, color="black", linestyle='dashed',)
        plot_quantile_functions(all_sampled_quantiles)
        plt.legend(["True distribution"])
        plt.title("Distribution: {}, Estimator: {}".format(fn, dn))
        plt.show()
    
    for dn in distr_outputs:
        all_agg_metrics = store_results[(fn, dn)]["all_agg_metrics"]
        summarize_metrics(all_agg_metrics)
    