In [1]:
#-*- coding:utf-8 -*-

import os
import time
import datetime
import random
import math
import pickle
import unicodedata

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import networkx as nx
import scipy.stats as st
from sklearn.preprocessing import StandardScaler

from tqdm import tqdm
from tqdm.contrib.concurrent import process_map

In [2]:
cmap = matplotlib.cm.get_cmap('tab10')

In [3]:
order = ['raw']\
        + ['min-max_p_{}'.format(p) for p in [10,20,30,40]]+['min-max']\
        + ['z-score_p_{}'.format(p) for p in [10,20,30,40]]+['z-score']\
        + ['nonlinear_p_{}'.format(p) for p in [10,20,30,40]]+['nonlinear']
order

['raw',
 'min-max_p_10',
 'min-max_p_20',
 'min-max_p_30',
 'min-max_p_40',
 'min-max',
 'z-score_p_10',
 'z-score_p_20',
 'z-score_p_30',
 'z-score_p_40',
 'z-score',
 'nonlinear_p_10',
 'nonlinear_p_20',
 'nonlinear_p_30',
 'nonlinear_p_40',
 'nonlinear']

In [4]:
out_all = []
for lag in [5, 10, 15]:
    for var in [1, 2, 3]:
        f_name = 'results_{}_{}.csv'.format(lag, var)
        out_list = []
        for period in [10, 20, 30, 40]:
            out = pd.read_csv(os.path.join('results/out_{}'.format(period), f_name), index_col=0)
            out = out.rename({'min-max_p': 'min-max_p_{}'.format(period),
                              'z-score_p': 'z-score_p_{}'.format(period),
                              'nonlinear_p': 'nonlinear_p_{}'.format(period)})
            out_list.append(out)
        out_df = pd.concat(out_list).groupby(level=0).mean()
        out_df['lag'] = lag
        out_df['var'] = var
        out_df.set_index(['lag', 'var', order])
        out_all.append(out_df)

In [5]:
out_df.loc[order]

Unnamed: 0,mu(mean),std(mean),MAE(mean),mu(std),std(std),MAE(std),lag,var
raw,14.95455,9.490621,2.02985,0.221986,1.935205,0.238714,15,3
min-max_p_10,14.6664,47.858756,5.6768,0.244039,3.652747,0.26745,15,3
min-max_p_20,12.7226,10.846654,3.3334,0.267946,1.996543,0.072543,15,3
min-max_p_30,11.8494,14.134498,3.921,0.235418,0.489095,0.117733,15,3
min-max_p_40,17.0676,14.294036,3.7836,0.05826,0.814836,0.162527,15,3
min-max,14.9398,9.405767,2.0196,0.210588,1.991697,0.24026,15,3
z-score_p_10,13.0356,41.04136,5.3132,0.194352,2.214085,0.177093,15,3
z-score_p_20,12.7084,27.357176,4.4728,0.300322,0.816626,0.13256,15,3
z-score_p_30,13.9586,21.79505,3.859,0.347327,3.222836,0.281078,15,3
z-score_p_40,12.42,20.695368,4.114,0.237975,1.91079,0.085206,15,3


In [6]:
out_all_df = pd.concat(out_all)

In [7]:
out_all_df.to_csv('results/results.csv')
out_all_df

Unnamed: 0,mu(mean),std(mean),MAE(mean),mu(std),std(std),MAE(std),lag,var
min-max,7.0411,38.491792,3.3252,0.254727,2.926045,0.192717,5,1
min-max_p_10,14.8556,33.032212,10.1372,0.412961,1.192325,0.381979,5,1
min-max_p_20,12.2532,66.790092,7.8160,0.218627,3.780614,0.198404,5,1
min-max_p_30,5.5380,4.962852,1.7564,0.591696,1.868927,0.484272,5,1
min-max_p_40,6.4214,10.228630,2.0714,0.091608,0.533675,0.053442,5,1
...,...,...,...,...,...,...,...,...
z-score,14.9497,9.439623,2.0230,0.210564,2.096249,0.249075,15,3
z-score_p_10,13.0356,41.041360,5.3132,0.194352,2.214085,0.177093,15,3
z-score_p_20,12.7084,27.357176,4.4728,0.300322,0.816626,0.132560,15,3
z-score_p_30,13.9586,21.795050,3.8590,0.347327,3.222836,0.281078,15,3


In [8]:
out_all_df = out_all_df.reset_index()
out_all_df = out_all_df.rename({'index': 'model'}, axis=1)

In [9]:
results = pd.pivot_table(out_all_df, values='mu(mean)', index='model', columns=['lag', 'var']).loc[order]
results.to_csv('results/results_mu.csv')
results

lag,5,5,5,10,10,10,15,15,15
var,1,2,3,1,2,3,1,2,3
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
raw,7.05205,10.38315,5.7812,9.13025,13.3655,11.8822,13.6905,14.40835,14.95455
min-max_p_10,14.8556,11.9404,16.9176,19.6628,14.6604,15.0178,16.626,17.0244,14.6664
min-max_p_20,12.2532,13.8438,6.5004,13.8524,14.8748,10.5186,14.2798,15.4604,12.7226
min-max_p_30,5.538,9.7778,4.1386,9.3062,10.6886,8.8836,13.5742,15.0734,11.8494
min-max_p_40,6.4214,6.6378,6.2612,13.0098,13.0546,10.5366,16.5602,16.6404,17.0676
min-max,7.0411,10.3983,5.7681,9.13445,13.3689,11.88085,13.6915,14.40045,14.9398
z-score_p_10,8.1292,9.7272,10.6994,11.8494,12.829,10.971,14.5502,14.555,13.0356
z-score_p_20,7.9632,10.79,7.5674,12.2664,13.1162,10.747,14.2668,14.5588,12.7084
z-score_p_30,8.1492,12.1714,6.8792,11.5344,14.0974,10.4662,15.2128,17.461,13.9586
z-score_p_40,6.6598,12.1428,7.321,12.453,11.915,10.1808,14.94,16.1598,12.42


In [10]:
results = pd.pivot_table(out_all_df, values='std(mean)', index='model', columns=['lag', 'var']).loc[order]
results.to_csv('results/results_std.csv')
results

lag,5,5,5,10,10,10,15,15,15
var,1,2,3,1,2,3,1,2,3
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
raw,38.629152,52.715392,6.95266,6.219948,40.735003,11.567,8.320958,4.62399,9.490621
min-max_p_10,33.032212,27.054012,39.088468,25.008444,37.106032,43.497758,18.871056,39.302184,47.858756
min-max_p_20,66.790092,39.799762,28.202612,37.18326,24.756244,13.190982,9.891462,24.718132,10.846654
min-max_p_30,4.962852,56.496686,7.030702,4.33029,15.744914,11.217548,10.17261,12.913686,14.134498
min-max_p_40,10.22863,15.200518,7.159684,17.588398,9.072298,3.212006,15.097578,17.333568,14.294036
min-max,38.491792,52.714197,6.902539,6.236303,40.726609,11.54002,8.352984,4.588691,9.405767
z-score_p_10,42.672384,53.18602,46.170794,23.686606,38.396338,37.027338,24.208454,43.069738,41.04136
z-score_p_20,30.773528,55.87828,26.031974,23.753764,38.576222,31.415974,35.654628,27.729828,27.357176
z-score_p_30,29.559648,66.750174,21.159232,12.431188,12.230646,11.46597,9.86228,16.855606,21.79505
z-score_p_40,7.435166,63.993916,22.08555,21.441094,15.009718,10.168808,21.089752,32.854842,20.695368


In [11]:
results = pd.pivot_table(out_all_df, values='MAE(mean)', index='model', columns=['lag', 'var']).loc[order]
results.to_csv('results/results_MAE.csv')
results

lag,5,5,5,10,10,10,15,15,15
var,1,2,3,1,2,3,1,2,3
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
raw,3.33655,6.19155,1.5417,1.49405,4.8514,2.7027,1.8247,1.48035,2.02985
min-max_p_10,10.1372,7.5084,12.0684,10.0508,6.4896,6.7894,3.4316,5.8632,5.6768
min-max_p_20,7.816,9.1826,3.6396,4.9944,5.808,2.8134,1.9618,3.9648,3.3334
min-max_p_30,1.7564,5.8486,2.349,1.6594,2.9702,2.6308,2.3442,2.6602,3.921
min-max_p_40,2.0714,2.717,2.302,3.7722,3.315,1.4282,3.077,3.386,3.7836
min-max,3.3252,6.2024,1.5356,1.49445,4.8493,2.69935,1.8287,1.47675,2.0196
z-score_p_10,4.2164,6.1204,6.7174,3.3538,5.3382,5.1086,3.0486,5.539,5.3132
z-score_p_20,3.3856,6.9764,3.9994,3.572,5.7998,4.3534,4.1592,4.0364,4.4728
z-score_p_30,3.6868,8.0514,3.6792,2.228,4.5578,2.4366,1.5696,3.9338,3.859
z-score_p_40,1.7866,7.6792,3.781,3.3646,3.5022,2.3672,2.7104,4.785,4.114


In [12]:
out_all_df['Err'] = abs(out_all_df['mu(mean)']-out_all_df['lag'])

In [13]:
results = pd.pivot_table(out_all_df, values='Err', index='model', columns=['lag', 'var']).loc[order]
results.to_csv('results/results_Err.csv')
results

lag,5,5,5,10,10,10,15,15,15
var,1,2,3,1,2,3,1,2,3
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
raw,2.05205,5.38315,0.7812,0.86975,3.3655,1.8822,1.3095,0.59165,0.04545
min-max_p_10,9.8556,6.9404,11.9176,9.6628,4.6604,5.0178,1.626,2.0244,0.3336
min-max_p_20,7.2532,8.8438,1.5004,3.8524,4.8748,0.5186,0.7202,0.4604,2.2774
min-max_p_30,0.538,4.7778,0.8614,0.6938,0.6886,1.1164,1.4258,0.0734,3.1506
min-max_p_40,1.4214,1.6378,1.2612,3.0098,3.0546,0.5366,1.5602,1.6404,2.0676
min-max,2.0411,5.3983,0.7681,0.86555,3.3689,1.88085,1.3085,0.59955,0.0602
z-score_p_10,3.1292,4.7272,5.6994,1.8494,2.829,0.971,0.4498,0.445,1.9644
z-score_p_20,2.9632,5.79,2.5674,2.2664,3.1162,0.747,0.7332,0.4412,2.2916
z-score_p_30,3.1492,7.1714,1.8792,1.5344,4.0974,0.4662,0.2128,2.461,1.0414
z-score_p_40,1.6598,7.1428,2.321,2.453,1.915,0.1808,0.06,1.1598,2.58
