In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet, Lasso
from statsmodels.tsa.ar_model import AutoReg
from sklearn.cross_decomposition import PLSRegression

from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse

from scipy.stats import norm
from statsmodels.tsa.stattools import acovf

import warnings
from tqdm import tqdm
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [2]:
data = pd.read_csv('data/df_processed.csv')
data = data.rename(columns = {'inflaton_rate' : 'inflation_rate'})
data = data.drop(columns = ['interest_rate'])                      # Remove interest rate 
#data = data.drop(columns = ['MOEX'])                      # Remove MOEX 
data

Unnamed: 0,month-year,inflation_rate,CPI,unemplyment_rate,GDP,fx_eur_rub,fx_usd_rub,oil_price,gas_price,gold_price,exports,imports,external_debt,chicken_eggs_10pcs,granulated_sugar_1kg,wheat_flour_1kg,avg_monthly_salary,sanctions,topic_0,topic_1,topic_10,topic_11,topic_12,topic_13,topic_14,topic_15,topic_16,topic_17,topic_18,topic_19,topic_2,topic_20,topic_21,topic_22,topic_23,topic_24,topic_25,topic_26,topic_27,topic_28,topic_29,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,inflaton_rate_lag_1,interest_rate_lag_1,CPI_lag_1,unemplyment_rate_lag_1,GDP_lag_1,fx_eur_rub_lag_1,fx_usd_rub_lag_1,oil_price_lag_1,gas_price_lag_1,gold_price_lag_1,exports_lag_1,imports_lag_1,external_debt_lag_1,chicken_eggs_10pcs_lag_1,granulated_sugar_1kg_lag_1,wheat_flour_1kg_lag_1,avg_monthly_salary_lag_1,sanctions_lag_1,topic_0_lag_1,topic_1_lag_1,topic_10_lag_1,topic_11_lag_1,topic_12_lag_1,topic_13_lag_1,topic_14_lag_1,topic_15_lag_1,topic_16_lag_1,topic_17_lag_1,topic_18_lag_1,topic_19_lag_1,topic_2_lag_1,topic_20_lag_1,topic_21_lag_1,topic_22_lag_1,topic_23_lag_1,topic_24_lag_1,topic_25_lag_1,topic_26_lag_1,topic_27_lag_1,topic_28_lag_1,topic_29_lag_1,topic_3_lag_1,topic_4_lag_1,topic_5_lag_1,topic_6_lag_1,topic_7_lag_1,topic_8_lag_1,topic_9_lag_1
0,2011-06,-0.17,0.2,-0.80,1410.022767,0.294486,0.072509,-5.068030,0.179903,16.981168,-183.4091,-612.7362,29759.00,-4.40,-0.99,-0.23,1358.0,0.0,0.010965,0.020755,0.042945,-0.024100,-0.000870,0.003960,-0.005177,-0.008439,0.000393,0.003873,-0.010572,0.003308,0.001046,-0.025016,-0.036764,0.007963,-0.000652,0.004178,0.020450,-0.004525,0.013010,0.013621,-0.009179,-0.011608,0.003003,0.045036,0.000611,0.004090,0.005048,0.007310,-0.02,3.0,0.6,0.20,0.000000,-0.492550,-0.211921,-8.681834,0.064493,29.149286,-1509.6119,999.5400,0.00,-6.87,-1.62,-0.19,260.0,0.0,-0.015291,-0.008705,0.027149,0.046140,-0.005383,-0.002118,-0.005183,-0.041349,0.019905,0.002262,-0.028721,0.017041,0.008994,0.000089,0.009740,0.002262,0.004725,-0.010566,0.040724,0.000401,-0.009106,-0.012082,0.008994,0.020315,-0.003121,-0.004693,-0.013285,0.006787,0.005127,-0.015692
1,2011-07,-0.41,0.0,-0.30,0.000000,-0.367301,-0.082742,1.051863,-0.162795,49.129550,-2030.2035,-298.6864,-29759.00,0.34,0.65,-0.07,-539.0,0.0,-0.003413,-0.010686,0.042254,-0.009634,0.017642,0.015669,-0.001440,0.038308,0.001051,-0.003788,0.032633,-0.011593,-0.011593,-0.003716,0.018016,-0.007878,0.003255,0.008324,0.014085,0.007042,-0.017195,-0.025836,0.003327,-0.008828,-0.004925,-0.014488,-0.001599,0.007042,-0.011665,0.007114,-0.17,0.0,0.2,-0.80,1410.022767,0.294486,0.072509,-5.068030,0.179903,16.981168,-183.4091,-612.7362,29759.00,-4.40,-0.99,-0.23,1358.0,0.0,0.010965,0.020755,0.042945,-0.024100,-0.000870,0.003960,-0.005177,-0.008439,0.000393,0.003873,-0.010572,0.003308,0.001046,-0.025016,-0.036764,0.007963,-0.000652,0.004178,0.020450,-0.004525,0.013010,0.013621,-0.009179,-0.011608,0.003003,0.045036,0.000611,0.004090,0.005048,0.007310
2,2011-08,-0.85,-0.3,0.40,0.000000,1.164849,0.860332,-10.999630,-0.369685,181.366300,2634.6997,2707.3087,0.00,2.97,-0.64,-0.12,-547.0,0.0,0.004574,-0.016487,0.025701,0.111831,-0.002457,-0.011880,0.004651,-0.049416,0.002150,0.004662,-0.009565,0.004574,0.006911,0.034959,-0.018900,-0.000011,-0.007053,0.004519,0.023364,-0.004706,0.006965,-0.014194,-0.018812,-0.052268,-0.007053,0.013580,-0.002501,0.021028,0.011660,-0.002446,-0.41,0.0,0.0,-0.30,0.000000,-0.367301,-0.082742,1.051863,-0.162795,49.129550,-2030.2035,-298.6864,-29759.00,0.34,0.65,-0.07,-539.0,0.0,-0.003413,-0.010686,0.042254,-0.009634,0.017642,0.015669,-0.001440,0.038308,0.001051,-0.003788,0.032633,-0.011593,-0.011593,-0.003716,0.018016,-0.007878,0.003255,0.008324,0.014085,0.007042,-0.017195,-0.025836,0.003327,-0.008828,-0.004925,-0.014488,-0.001599,0.007042,-0.011665,0.007114
3,2011-09,-0.95,0.0,-0.40,1310.761261,1.195111,1.861869,-0.730871,-0.134518,3.694409,-781.6325,-2594.2622,-12542.00,3.12,-3.83,-0.41,417.0,0.0,-0.009828,-0.002612,0.058957,-0.073331,-0.009759,-0.000895,-0.000275,0.015115,0.048647,-0.007009,-0.010172,-0.000758,-0.014432,0.002951,0.019651,0.002199,-0.000069,-0.010172,0.013605,-0.002336,-0.007285,0.015184,0.022469,0.000609,0.022607,-0.009971,-0.003232,0.006803,-0.000482,0.003915,-0.85,0.0,-0.3,0.40,0.000000,1.164849,0.860332,-10.999630,-0.369685,181.366300,2634.6997,2707.3087,0.00,2.97,-0.64,-0.12,-547.0,0.0,0.004574,-0.016487,0.025701,0.111831,-0.002457,-0.011880,0.004651,-0.049416,0.002150,0.004662,-0.009565,0.004574,0.006911,0.034959,-0.018900,-0.000011,-0.007053,0.004519,0.023364,-0.004706,0.006965,-0.014194,-0.018812,-0.052268,-0.007053,0.013580,-0.002501,0.021028,0.011660,-0.002446
4,2011-10,-0.02,0.5,-0.10,0.000000,0.581012,0.677647,0.818096,-0.225095,-92.271438,2189.9534,1760.0411,12542.00,1.00,-2.18,-0.38,134.0,0.0,0.006145,0.020288,0.058716,0.012690,0.001073,-0.005625,-0.001731,-0.007789,-0.052171,0.000000,0.013156,-0.008430,-0.004431,-0.007148,0.009694,-0.002700,-0.002268,0.042514,0.022018,0.000000,0.003774,0.001818,-0.012965,0.003828,-0.012099,-0.009432,0.005384,0.012844,-0.001194,-0.006595,-0.95,0.0,0.0,-0.40,1310.761261,1.195111,1.861869,-0.730871,-0.134518,3.694409,-781.6325,-2594.2622,-12542.00,3.12,-3.83,-0.41,417.0,0.0,-0.009828,-0.002612,0.058957,-0.073331,-0.009759,-0.000895,-0.000275,0.015115,0.048647,-0.007009,-0.010172,-0.000758,-0.014432,0.002951,0.019651,0.002199,-0.000069,-0.010172,0.013605,-0.002336,-0.007285,0.015184,0.022469,0.000609,0.022607,-0.009971,-0.003232,0.006803,-0.000482,0.003915
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,2023-08,0.83,0.7,-0.15,0.000000,3.777403,4.914347,5.286370,0.008580,-33.208478,6013.2000,-17.7000,0.00,0.84,0.91,-0.07,-1980.0,0.0,-0.002828,-0.019623,0.044867,-0.003175,0.015166,-0.005500,-0.000490,-0.002840,-0.005311,0.018332,0.000034,-0.005938,-0.002381,0.039972,-0.001207,-0.013781,-0.029810,0.005683,0.023574,0.001465,0.033674,-0.001844,-0.005345,-0.000950,0.025936,0.001589,-0.013328,0.013688,0.000158,-0.011829,1.06,1.0,1.5,0.00,0.000000,9.836955,7.336636,5.760214,0.161931,9.112618,-3471.8000,195.3000,17972.60,1.18,3.22,-0.22,-5185.0,-1.0,0.002873,0.003397,0.066122,0.007434,0.003565,0.005372,0.003664,0.000929,-0.010114,0.003072,-0.002249,0.004156,-0.013304,0.004942,-0.016781,0.037427,-0.074341,0.013460,0.022041,-0.000791,0.007696,-0.027724,0.000175,0.005839,0.012719,-0.006911,0.011049,0.009796,0.009316,0.003185
147,2023-09,0.87,2.1,-0.05,5017.504956,-0.562898,0.941082,8.109630,0.050520,-5.146521,-1572.6500,-988.7000,-16946.17,6.35,-0.61,0.15,1483.0,0.0,0.014323,0.034598,0.039339,-0.001231,-0.004191,-0.006028,0.006508,0.002545,0.010021,-0.013603,0.005876,0.000105,0.003097,-0.041372,-0.002963,-0.028859,0.040290,-0.003167,0.022030,0.000079,-0.039488,0.004016,0.003358,-0.003486,0.032293,-0.003144,-0.001465,0.011015,-0.009149,0.010784,0.83,3.5,0.7,-0.15,0.000000,3.777403,4.914347,5.286370,0.008580,-33.208478,6013.2000,-17.7000,0.00,0.84,0.91,-0.07,-1980.0,0.0,-0.002828,-0.019623,0.044867,-0.003175,0.015166,-0.005500,-0.000490,-0.002840,-0.005311,0.018332,0.000034,-0.005938,-0.002381,0.039972,-0.001207,-0.013781,-0.029810,0.005683,0.023574,0.001465,0.033674,-0.001844,-0.005345,-0.000950,0.025936,0.001589,-0.013328,0.013688,0.000158,-0.011829
148,2023-10,0.68,2.0,0.00,0.000000,-0.978783,0.846426,-3.963682,0.453532,3.158179,-1572.6500,-988.7000,16946.17,11.45,-0.35,0.15,2907.5,1.0,-0.018170,-0.017280,0.035294,0.002270,0.004824,0.003341,0.003250,-0.006187,0.011065,-0.005137,-0.001396,0.001895,-0.008424,0.020823,0.013606,0.021392,-0.036698,0.001158,0.022689,0.008564,-0.000861,0.004503,0.003164,0.000643,-0.035108,0.003860,0.004181,0.023529,0.006542,0.005055,0.87,1.0,2.1,-0.05,5017.504956,-0.562898,0.941082,8.109630,0.050520,-5.146521,-1572.6500,-988.7000,-16946.17,6.35,-0.61,0.15,1483.0,0.0,0.014323,0.034598,0.039339,-0.001231,-0.004191,-0.006028,0.006508,0.002545,0.010021,-0.013603,0.005876,0.000105,0.003097,-0.041372,-0.002963,-0.028859,0.040290,-0.003167,0.022030,0.000079,-0.039488,0.004016,0.003358,-0.003486,0.032293,-0.003144,-0.001465,0.011015,-0.009149,0.010784
149,2023-11,0.79,2.7,-0.05,0.000000,-4.575013,-6.690672,-8.090151,-0.099182,69.803249,554.0000,1213.0000,0.00,14.74,-2.12,0.58,-446.5,-1.0,0.003609,0.001137,0.045230,0.003056,-0.011517,0.008453,0.002463,0.005721,-0.015611,0.004840,0.006256,0.000715,0.007882,-0.042003,-0.011032,0.005798,0.062995,-0.001739,0.024671,-0.007635,0.020056,-0.007298,-0.005204,-0.001038,-0.027735,-0.004615,0.001856,0.009868,-0.003900,0.006233,0.68,2.0,2.0,0.00,0.000000,-0.978783,0.846426,-3.963682,0.453532,3.158179,-1572.6500,-988.7000,16946.17,11.45,-0.35,0.15,2907.5,1.0,-0.018170,-0.017280,0.035294,0.002270,0.004824,0.003341,0.003250,-0.006187,0.011065,-0.005137,-0.001396,0.001895,-0.008424,0.020823,0.013606,0.021392,-0.036698,0.001158,0.022689,0.008564,-0.000861,0.004503,0.003164,0.000643,-0.035108,0.003860,0.004181,0.023529,0.006542,0.005055


## HyperParams grid

In [3]:
random_forest_param_grid = {
#     'n_estimators' : [10, 30, 50, 100],
    'max_depth' : [None, 1, 10, 20],
    'random_state' : [42]
}

lasso_param_grid = {
    'alpha': [0.01, 0.1, 1, 10, 100]
    }

pls_param_grid = {
    'n_components': [5, 10, 20, 30, 40, 50, 60]
    }

elasticnet_param_grid = {
    'alpha': [0.01, 0.1, 1, 10, 100],
    'l1_ratio': [0.2, 0.5, 0.8]
}

horizon = 12
split_point = int(len(data) * 0.85)
print(split_point)

128


In [4]:
def test_a_model(data, split_point, horizon, model, param_grid):
    frame = data.copy()

    features = list(frame.drop(columns = ['month-year', 'inflation_rate']).columns)

    # Train and Test sets snapshot
    train_part = frame.iloc[ : split_point]
    test_part = frame.iloc[split_point : ]

    X_train = frame.iloc[ : split_point][features]
    y_train = frame.iloc[ : split_point]['inflation_rate'].values
    y_test = frame.iloc[split_point : split_point + horizon + 1]['inflation_rate'].to_list()

    cv = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=5)
    cv.fit(X_train, y_train)
    best_model = cv.best_estimator_
    
    predictions = []

    # horizon (12 periods) predictions
    for h in range(horizon):
        test_step = frame.iloc[split_point+h:split_point+h+1][features] # takes 1 observation from test to predict
        
        if h > 0:
            test_step['inflaton_rate_lag_1'] = prediction # replaces inflaton_rate_lag_1 with last_prediction  
        
        prediction = best_model.predict(test_step)[0]     # makes 1 prediction
        predictions.append(prediction)                    # saves prediction
        
    return {
        'Best Model': best_model,
        'True Values': y_test,
        'Predictions': predictions,
        'Train Size' : len(X_train)
        }


In [5]:
def recursive_loop(data):
    all_results = {
        'RF':[],
        'LASSO':[],
        'ENET': [],
        'PLS' : [],
        'AR4':[]
        }

    for i in tqdm(range(split_point, len(data)-horizon)):

        # RandomForest
        rf_results = test_a_model(
            data        = data,
            split_point = i,
            horizon     = horizon,
            model       = RandomForestRegressor(),
            param_grid  = random_forest_param_grid,
        )
        all_results['RF'].append((i, rf_results))

        # LASSO
        lasso_results = test_a_model(
            data        = data,
            split_point = split_point,
            horizon     = horizon,
            model       = Lasso(),
            param_grid  = lasso_param_grid,
            )
        all_results['LASSO'].append((i, lasso_results))

        # Elastic-NET
        elastic_net_results = test_a_model(
            data        = data,
            split_point = split_point,
            horizon     = horizon,
            model       = ElasticNet(),
            param_grid  = elasticnet_param_grid,
            )
        all_results['ENET'].append((i, elastic_net_results))
        
        # PLSRegression
        pls_results = test_a_model(
            data        = data,
            split_point = split_point,
            horizon     = horizon,
            model       = PLSRegression(),
            param_grid  = pls_param_grid,
            )
        all_results['PLS'].append((i, pls_results))
        
        
        # AR4
        y_train = data.iloc[ : i]['inflation_rate'].values
        ar4_model = AutoReg(y_train, lags=4).fit()
        predictions = ar4_model.predict(start=len(y_train), end=len(y_train)+horizon)
        all_results['AR4'].append((i, {'Predictions': predictions, 'Train Size': len(y_train)}))
    return all_results

In [6]:
def newey_west_adjusted_dm_stat(loss_diff, horizon):
    """
    Computes the Newey-West adjusted Diebold-Mariano statistic.
    """
    T = len(loss_diff)
    mean_loss_diff = np.mean(loss_diff)
    autocovariances = acovf(loss_diff, fft=True)[:horizon]
    nw_variance = autocovariances[0] + 2 * np.sum((1 - np.arange(1, horizon) / horizon) * autocovariances[1:horizon])
    dm_stat = mean_loss_diff / np.sqrt(nw_variance / T)
    return dm_stat

def get_stats(all_results):
    train_sizes = []
    models = []
    horizons = []
    rmses = []
    rRMSEs = []
    dm_results = []
    significances = []
    for k in range(len(all_results['RF'])-1):
        for model in ['RF', 'LASSO', 'ENET', 'PLS','AR4']:
            for h in [3, 6, 9, 12]:

                if model == 'AR4':
                    rmse = np.sqrt(mse(all_results['RF'][k][1]['True Values'][:h], all_results[model][k][1]['Predictions'][:h]))
                    rRMSE = rmse / np.sqrt(mse(all_results['RF'][k][1]['True Values'][:h], all_results[model][k][1]['Predictions'][:h]))
                    train_sizes.append(all_results[model][k][0])
                    models.append(model)
                    horizons.append(h)
                    rmses.append(rmse)
                    rRMSEs.append(rRMSE)
                    dm_results.append(None)
                    significances.append(None)

                else:
                    rmse = np.sqrt(mse(all_results[model][k][1]['True Values'][:h], all_results[model][k][1]['Predictions'][:h]))
                    rRMSE = rmse / np.sqrt(mse(all_results[model][k][1]['True Values'][:h], all_results['AR4'][k][1]['Predictions'][:h]))

                    error_diff = np.array(all_results[model][k][1]['True Values'][:h]) - np.array(all_results[model][k][1]['Predictions'][:h])
                    error_ar4 = np.array(all_results[model][k][1]['True Values'][:h]) - np.array(all_results['AR4'][k][1]['Predictions'][:h])
                    loss_diff = error_diff**2 - error_ar4**2
                    dm_stat = newey_west_adjusted_dm_stat(loss_diff, h) 

                    if dm_stat > 0:
                        pval = 1 - norm.cdf(dm_stat)  # One-sided test
                    else:
                        pval = norm.cdf(dm_stat)

                    dm_result = np.round(dm_stat, 5) 
                    significance = "*" if pval < 0.05 else "" 
                    significance = "**" if pval < 0.01 else significance 
                    significance = "***" if pval < 0.001 else significance

                    train_sizes.append(all_results[model][k][0])
                    models.append(model)
                    horizons.append(h)
                    rmses.append(rmse)
                    rRMSEs.append(rRMSE)
                    dm_results.append(dm_result)
                    significances.append(significance)


    results = pd.DataFrame(data={
        'model': models,
        'Train Size': train_sizes,
        'horizon (months)': horizons,
        'RMSE': rmses,
        'rRMSE': rRMSEs,
        'DM-test' : dm_results,
        'Significance' : significances})

    return results   

# Run models on different set of features

In [7]:
me_results = recursive_loop(data[[ _ for _ in data.columns if 'topic_' not in _]])

topics_results = recursive_loop(data[['month-year', 'inflation_rate', 'inflaton_rate_lag_1']+[ _ for _ in data.columns if 'topic_' in _]])

me_and_topics_results = recursive_loop(data)

100%|███████████████████████████████████████████████████████████████████████████| 11/11 [00:36<00:00,  3.34s/it]
100%|███████████████████████████████████████████████████████████████████████████| 11/11 [00:59<00:00,  5.39s/it]
100%|███████████████████████████████████████████████████████████████████████████| 11/11 [01:19<00:00,  7.23s/it]


In [8]:
me_stats = get_stats(me_results)
topics_stats = get_stats(topics_results)
me_and_topics = get_stats(me_and_topics_results)

In [9]:
me_stats[me_stats['Train Size']==130]

Unnamed: 0,model,Train Size,horizon (months),RMSE,rRMSE,DM-test,Significance
40,RF,130,3,1.035615,0.202128,-5.35015,***
41,RF,130,6,0.805184,0.208714,-2.526,**
42,RF,130,9,0.693036,0.211422,-2.19924,*
43,RF,130,12,2.235146,0.611401,-2.20334,*
44,LASSO,130,3,1.016711,0.24238,-1.90759,*
45,LASSO,130,6,0.762822,0.231059,-2.15252,*
46,LASSO,130,9,0.691837,0.241263,-2.22727,*
47,LASSO,130,12,0.603997,0.240247,-2.01212,*
48,ENET,130,3,1.319028,0.314451,-1.81775,*
49,ENET,130,6,0.962264,0.29147,-2.13491,*


In [10]:
topics_stats[topics_stats['Train Size']==130]

Unnamed: 0,model,Train Size,horizon (months),RMSE,rRMSE,DM-test,Significance
40,RF,130,3,1.441951,0.281436,-4.46961,***
41,RF,130,6,1.260497,0.326737,-2.29842,*
42,RF,130,9,1.186392,0.361927,-1.98116,*
43,RF,130,12,2.552348,0.698169,-1.66555,*
44,LASSO,130,3,4.298634,1.024778,0.07116,
45,LASSO,130,6,3.119556,0.944915,-0.27258,
46,LASSO,130,9,2.594473,0.904764,-0.63668,
47,LASSO,130,12,2.255699,0.897232,-0.81001,
48,ENET,130,3,4.297555,1.02452,0.07042,
49,ENET,130,6,3.119026,0.944754,-0.2734,


In [11]:
me_and_topics[me_and_topics['Train Size']==130]

Unnamed: 0,model,Train Size,horizon (months),RMSE,rRMSE,DM-test,Significance
40,RF,130,3,0.984294,0.192112,-5.17184,***
41,RF,130,6,0.789641,0.204685,-2.4963,**
42,RF,130,9,0.684189,0.208723,-2.18261,*
43,RF,130,12,2.258246,0.61772,-2.13147,*
44,LASSO,130,3,1.016711,0.24238,-1.90759,*
45,LASSO,130,6,0.762822,0.231059,-2.15252,*
46,LASSO,130,9,0.691837,0.241263,-2.22727,*
47,LASSO,130,12,0.603997,0.240247,-2.01212,*
48,ENET,130,3,1.319028,0.314451,-1.81775,*
49,ENET,130,6,0.962264,0.29147,-2.13491,*


# Joint report

In [12]:
def make_report(train_size):
    m = me_stats[me_stats['Train Size']==train_size][['model', 'horizon (months)', 'rRMSE']]
    n = topics_stats[topics_stats['Train Size']==train_size][['model', 'horizon (months)', 'rRMSE']]
    m_n = me_and_topics[me_and_topics['Train Size']==train_size][['model', 'horizon (months)', 'rRMSE']]
    
    m = m.rename(columns = {'rRMSE' : 'rRMSE (M)'})
    n = n.rename(columns = {'rRMSE' : 'rRMSE (N)'})
    m_n = m_n.rename(columns = {'rRMSE' : 'rRMSE (M+N)'})
    
    report = m.merge(n, how = 'left', on = ['model', 'horizon (months)'])
    report = report.merge(m_n, how = 'left', on = ['model', 'horizon (months)'])
    report = report[report['model'] != 'AR4']
    
    report['% Improvement'] = np.round(100 * (report['rRMSE (M+N)'] - report['rRMSE (M)']) / report['rRMSE (M)'], 2)
    return report


In [13]:
report = make_report(130)
report

Unnamed: 0,model,horizon (months),rRMSE (M),rRMSE (N),rRMSE (M+N),% Improvement
0,RF,3,0.202128,0.281436,0.192112,-4.96
1,RF,6,0.208714,0.326737,0.204685,-1.93
2,RF,9,0.211422,0.361927,0.208723,-1.28
3,RF,12,0.611401,0.698169,0.61772,1.03
4,LASSO,3,0.24238,1.024778,0.24238,0.0
5,LASSO,6,0.231059,0.944915,0.231059,0.0
6,LASSO,9,0.241263,0.904764,0.241263,0.0
7,LASSO,12,0.240247,0.897232,0.240247,0.0
8,ENET,3,0.314451,1.02452,0.314451,0.0
9,ENET,6,0.29147,0.944754,0.29147,0.0
