In [None]:
#%xmode Verbose

In [None]:
import sys
sys.path.append("../libs/")
sys.path.append("../../../deep-belief-network/")

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from utils import shift_join_data,rmse,print_line, plot_pred, flatten
from dbn.models import SupervisedDBNRegression
import datetime

In [None]:
fecha_hora = datetime.datetime.now().strftime('%Y%m%d_%H%M')
res_name = 'resultados/DBN_'+fecha_hora
n_iter = 30

In [None]:
# Parametros obtenidos por optimizacion
rbm_layers = 2
rbm_nodes = 112
h_layers_structure = [rbm_nodes for _ in range(rbm_layers)]
activation = 'tanh'
dropout = 0.2
rbm_learning_rate = 1e-4
bp_learning_rate = 1e-2
n_epochs = 20
n_iter_backprop = 200
mini_batch = 32

# Carga de datos

In [None]:
df = pd.read_csv('../data/wb_dataset_prep.csv')
df = df.drop('country',axis=1)
iso = df['iso'].unique()    #Codigos de paises
df = df.set_index(['iso','year'])

Paises

In [None]:
print(iso)

Features

In [None]:
df.info()

# Dividir Datos

In [None]:
iso_test = ['PER']
iso_train = iso[(iso != iso_test[0])]
target_col = ['rgdp_growth']
features = df.columns[(df.columns!=target_col[0])]
df_test = df.copy()


Países de Entrenamiento

In [None]:
print(iso_train)

Features de Entrenamiento

In [None]:
print(features)

# Normalizar

In [None]:
df_x_train = df.loc[iso_train][features].copy()
df_y_train = df.loc[iso_train][target_col].copy()
df_x_test = df_test.loc[iso_test][features].copy()
df_y_test = df_test.loc[iso_test][target_col].copy()

std_scaler_x_train = StandardScaler()
std_scaler_y_train = StandardScaler()
std_scaler_x_test = StandardScaler()
std_scaler_y_test = StandardScaler()

df_x_train.iloc[:,:] = std_scaler_x_train.fit_transform(df_x_train)
df_y_train.iloc[:,:] = std_scaler_y_train.fit_transform(df_y_train)
df_x_test.iloc[:,:] = std_scaler_x_test.fit_transform(df_x_test)
df_y_test.iloc[:,:] = std_scaler_y_test.fit_transform(df_y_test)

df_x_train.iloc[:,:] = np.clip(df_x_train,-3,3)
df_y_train.iloc[:,:] = np.clip(df_y_train,-3,3)
df_x_test.iloc[:,:] = np.clip(df_x_test,-3,3)
df_y_test.iloc[:,:] = np.clip(df_y_test,-3,3)

# Generar variables lag y horizonte

In [None]:
n_steps_in = 10
n_steps_out = 3
n_features = len(features)

x_train, y_train = shift_join_data(df_x_train,df_y_train,iso_train,n_steps_in,n_steps_out)
x_test, y_test = shift_join_data(df_x_test,df_y_test,iso_test,n_steps_in,n_steps_out)
x_test, y_test = x_test[- int(len(x_test)*0.2):], y_test[- int(len(y_test)*0.2):]
x_train = flatten(x_train)
x_test = flatten(x_test)

# Entrenamiento y evaluación

In [None]:
def evaluar_dbn(x_train : np.ndarray, y_train : np.ndarray, x_test : np.ndarray,
        y_test : np.ndarray, n_iter : int, scaler : StandardScaler):
    n_splits = 5    # Nro de K Folds para CV
    res_path = res_name+'.csv'
    tscv = TimeSeriesSplit(n_splits = n_splits)
    print_line("rmse\n",res_path)
    # Inicio
    gl_rmse = list()
    gl_models = list()
    nro = 0
    print('Inicio de evaluacion:')
    for i in range(n_iter):
        val_rmse = list()
        for train_idx, _ in tscv.split(x_train):
            # CV split
            x_t, y_t = x_train[train_idx], y_train[train_idx]
            # Entrenamiento
            model = SupervisedDBNRegression(
                        hidden_layers_structure = h_layers_structure,
                        learning_rate_rbm = rbm_learning_rate,
                        learning_rate = bp_learning_rate,
                        n_epochs_rbm = n_epochs,
                        n_iter_backprop = n_iter_backprop,
                        batch_size = mini_batch,
                        activation_function = activation,
                        dropout_p = dropout,
                        verbose = False)
            model.fit(x_t, y_t)
            # Prediccion
            y_pred = model.predict(x_test)
            # Denormalizando
            dn_y_test = scaler.inverse_transform(y_test)
            dn_y_pred = scaler.inverse_transform(y_pred)
            # Evaluacion
            val_rmse.append(rmse(dn_y_test, dn_y_pred))
            gl_rmse.append(rmse(dn_y_test, dn_y_pred))
            model.save(res_name+'_'+str(nro)+'.pickle')
            gl_models.append(model)
            nro += 1
        # Promedios
        mean_rmse = np.mean(val_rmse)
        print_line('{}\n'.format(mean_rmse),res_path)
        print('Iter: {}/{} completado.'.format(i+1,n_iter))
    print('Fin de evaluacion.')
    mejor_modelo = gl_models[np.argmin(gl_rmse)]
    print('Mejor modelo: {}'.format(mejor_modelo))
    return mejor_modelo

In [None]:
# evaluar
mejor_modelo = evaluar_dbn(x_train, y_train, x_test, y_test,n_iter,std_scaler_y_test)

# Resultados

In [None]:
df_res = pd.read_csv(res_name+'.csv')

In [None]:
df_res.describe()

In [None]:
df_res.plot.box()

# Prediccion

In [None]:
y_pred = mejor_modelo.predict(x_test)
dn_y_test = std_scaler_y_test.inverse_transform(y_test)
dn_y_pred = std_scaler_y_test.inverse_transform(y_pred)

In [None]:
plot_pred(dn_y_test, dn_y_pred,2021)

In [None]:
from notifications import enviar_correo
enviar_correo("Evaluacion Finalizado!","Se ha completado: {}".format(res_name))