In [None]:
#%xmode Verbose

In [None]:
import sys
sys.path.append("../libs/")

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.callbacks import EarlyStopping
from utils import shift_join_data,rmse,print_line, plot_pred
from keras.models import load_model
import datetime
from tensorflow.keras.optimizers import Adam

In [None]:
# Ruta del modelo con hiperparametros optimizados (sin entrenar)
model_path = 'resultados/Seq2Seq_20220731_1209_27.h5'
# Callback para detener el entrenamiento cuando el error de validacion no disminuye despues de 50 epocas
es = EarlyStopping(monitor='val_loss', mode='min', patience=50,restore_best_weights=True)
mini_batch = 32
n_epochs= 2000  # Nro de epocas maximas
fecha_hora = datetime.datetime.now().strftime('%Y%m%d_%H%M')
res_name = 'resultados/Seq2Seq_TR_'+fecha_hora
n_iter = 30
learning_rate = 1e-3

# Carga de datos

In [None]:
df = pd.read_csv('../data/wb_dataset_prep.csv')
df = df.drop('country',axis=1)
iso = df['iso'].unique()    #Codigos de paises
df = df.set_index(['iso','year'])

Paises

In [None]:
print(iso)

Features

In [None]:
df.info()

# Dividir Datos

In [None]:
iso_train = ['PER']
target_col = ['rgdp_growth']
features = df.columns[(df.columns!=target_col[0])]

Países de Entrenamiento

In [None]:
print(iso_train)

Features de Entrenamiento

In [None]:
print(features)

# Normalizar

In [None]:
df_x = df.loc[iso_train][features].copy()
df_y = df.loc[iso_train][target_col].copy()

std_scaler_x = StandardScaler()
std_scaler_y = StandardScaler()

df_x.iloc[:,:] = std_scaler_x.fit_transform(df_x)
df_y.iloc[:,:] = std_scaler_y.fit_transform(df_y)

df_x.iloc[:,:] = np.clip(df_x,-3,3)
df_y.iloc[:,:] = np.clip(df_y,-3,3)

# Generar variables lag y horizonte

In [None]:
n_steps_in = 10
n_steps_out = 3
n_features = len(features)

x, y = shift_join_data(df_x,df_y,iso_train,n_steps_in,n_steps_out)

x_train, y_train = x[:-int(len(x)*0.2)], y[:-int(len(y)*0.2)]
x_test, y_test = x[- int(len(x)*0.2):], y[- int(len(y)*0.2):]


# Entrenamiento y evaluación

In [None]:
def evaluar_seq2seq_tr(x_train : np.ndarray, y_train : np.ndarray, x_test : np.ndarray,
        y_test : np.ndarray, n_iter : int, scaler : StandardScaler):
    n_splits = 5    # Nro de K Folds para CV
    res_path = res_name+'.csv'
    tscv = TimeSeriesSplit(n_splits = n_splits)
    print_line("rmse\n",res_path)
    # Inicio
    gl_rmse = list()
    nro = 0
    print('Inicio de evaluacion:')
    for i in range(n_iter):
        val_rmse = list()
        for train_idx, test_idx in tscv.split(x_train):
            # CV split
            x_t, y_t = x_train[train_idx], y_train[train_idx]
            x_v, y_v = x_train[test_idx], y_train[test_idx]
            # Entrenamiento
            model = load_model(model_path)
            model.compile(optimizer=Adam(learning_rate=learning_rate),loss='mse')
            model.fit(x_t, y_t, validation_data = (x_v, y_v), epochs = n_epochs,
                batch_size = mini_batch, callbacks = [es], shuffle = False, verbose = 0)
            # Prediccion
            y_pred = model(x_test)
            y_pred = np.reshape(y_pred, (y_pred.shape[0], y_pred.shape[1]))
            # Denormalizando
            dn_y_test = scaler.inverse_transform(y_test)
            dn_y_pred = scaler.inverse_transform(y_pred)
            # Evaluacion
            val_rmse.append(rmse(dn_y_test, dn_y_pred))
            gl_rmse.append(rmse(dn_y_test, dn_y_pred))
            model.save(res_name+'_'+str(nro)+'.h5')
            nro += 1
        # Promedios
        mean_rmse = np.mean(val_rmse)
        print_line('{}\n'.format(mean_rmse),res_path)
        print('Iter: {}/{} completado.'.format(i+1,n_iter))
    print('Fin de evaluacion.')
    mejor_modelo = np.argmin(gl_rmse)
    print('Mejor modelo: {}'.format(mejor_modelo))
    return mejor_modelo

In [None]:
# evaluar
nro_mejor = evaluar_seq2seq_tr(x_train, y_train, x_test, y_test,n_iter,std_scaler_y)

# Resultados

In [None]:
df_res = pd.read_csv(res_name+'.csv')

In [None]:
df_res.describe()

In [None]:
df_res.plot.box()

# Prediccion

In [None]:

model = load_model(res_name+'_'+str(nro_mejor)+'.h5')
y_pred = model(x_test)
y_pred = np.reshape(y_pred, (y_pred.shape[0], y_pred.shape[1]))
dn_y_test = std_scaler_y.inverse_transform(y_test)
dn_y_pred = std_scaler_y.inverse_transform(y_pred)
plot_pred(dn_y_test, dn_y_pred,2021)

In [None]:
from notifications import enviar_correo
enviar_correo("Evaluacion Finalizado!","Se ha completado: {}".format(res_name))