# Ensemble

In [1]:
import os
os.chdir("../../")

import pandas as pd
import numpy as np
from statsmodels.tsa.vector_ar.vecm import VECM
from statsmodels.tsa.vector_ar.vecm import select_order
from scripts.python.tsa.mtsmodel import *
from scripts.python.tsa.ts_eval import *


import seaborn as sns
sns.set_style("whitegrid")
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [2]:
for country in ["palau", "samoa", "tonga", "solomon_islands", "vanuatu"]:

    folderpath = os.getcwd() + "/data/tourism/" + str(country) + "/model/"
    mappings = [("sarimax", "train_pred"),
                ("var", "pred_total"), ("lf", "pred_mean")]

    country_pred = pd.DataFrame()
    for mapping in mappings:
        model, column = mapping
        filepath = folderpath + str(model) + "_" + str(country) + ".csv"
        pred_df = pd.read_csv(filepath).drop("Unnamed: 0", axis=1)
        pred_df["date"] = pd.to_datetime(pred_df["date"])

        model_col = (pred_df[["date", "total", column]]
                     .rename({column: model}, axis=1))

        if country_pred.empty:
            country_pred = model_col
        else:
            country_pred = country_pred.merge(model_col)
    
    # Mean Ensemble
    country_pred["mean_ensemble"] = (
        country_pred[["sarimax", "var", "lf"]].mean(axis=1))
    
    # Stack
    import statsmodels.formula.api as smf
    res = smf.ols("total~sarimax+var+lf", data=country_pred).fit_regularized()
    country_pred["stack_ols"] = res.fittedvalues
    
    evals = pd.DataFrame()
    for col in ["sarimax", "var", "lf", "mean_ensemble", "stack_ols"]:
        mod_eval = pd.DataFrame(calculate_evaluation(country_pred["total"], country_pred[col]),
                                index=[col])
        evals = pd.concat([evals, mod_eval], axis=0)

    evals.columns.name = str(country)
    display(evals)

palau,MSE,RMSE,MAE,SMAPE
sarimax,1586349.0,1259.503308,701.765452,53.857541
var,1127006.0,1061.605449,554.935555,38.243616
lf,494503.4,703.209346,389.892735,40.776367
mean_ensemble,532520.0,729.739653,412.870015,33.837139
stack_ols,470422.1,685.873218,388.073317,45.216361


samoa,MSE,RMSE,MAE,SMAPE
sarimax,8076302.0,2841.883483,1410.434675,141.993878
var,10290890.0,3207.941282,1757.557409,141.614714
lf,2107651.0,1451.775046,763.990597,131.108537
mean_ensemble,3794369.0,1947.913971,1093.282593,135.822954
stack_ols,2003690.0,1415.517615,835.530337,131.80255


tonga,MSE,RMSE,MAE,SMAPE
sarimax,776703.3,881.307702,381.831288,80.236281
var,1226895.0,1107.653092,472.092955,54.359031
lf,678451.1,823.68143,319.198937,102.021722
mean_ensemble,329857.1,574.331844,223.2797,82.645615
stack_ols,193816.1,440.245499,183.024479,60.651486


solomon_islands,MSE,RMSE,MAE,SMAPE
sarimax,47538.016148,218.032145,149.903809,32.72611
var,65021.669854,254.99347,154.338247,17.694166
lf,48753.426027,220.80178,150.232442,26.614403
mean_ensemble,33204.357994,182.22063,125.903946,23.072342
stack_ols,38922.471964,197.28779,139.112394,44.268386


vanuatu,MSE,RMSE,MAE,SMAPE
sarimax,669251.0,818.077609,372.363525,133.369731
var,1600258.0,1265.013157,554.204122,134.493067
lf,668646.0,817.707761,520.15692,128.001919
mean_ensemble,495178.4,703.689161,350.788402,131.534514
stack_ols,440116.1,663.41249,355.018492,128.216336
