In [None]:
import sys
sys.path.append("../libs/")

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from utils import shift_data, print_hp,print_line, graficarTodo, split_df, graficarClases, plot_metrics, plot_cm, plot_roc, plot_prc, plot_probs
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import Constant
from tensorflow.keras.optimizers import Adam
import datetime
import os
import keras_tuner as kt
from keras_tuner import HyperModel
import keras.metrics as metrics
from keras.models import Sequential, load_model, model_from_json
from keras.layers import Dense, Flatten, Dropout
from keras.losses import BinaryCrossentropy
from keras.utils.vis_utils import plot_model

# Carga de datos

In [None]:
df = pd.read_csv('../data/PERU_DATA_DIFF.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.set_index('DATE')

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
graficarTodo(df,'Features')

# Mostrar Imbalance de clases

In [None]:
neg, pos = np.bincount(df['Class'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

In [None]:
graficarClases(df['Class'])

In [None]:
initial_bias = np.log([pos/neg])

In [None]:
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

# Dividir Datos

In [None]:
target_col = ['Class']
features = df.columns[(df.columns!=target_col[0])]
df_train, df_test = split_df(df,0.3)

# Normalizar

In [None]:
df_x_train = df_train[features].copy()
df_y_train = df_train[target_col].copy()
df_x_test = df_test[features].copy()
df_y_test = df_test[target_col].copy()

scaler = StandardScaler()

df_x_train.iloc[:,:] = scaler.fit_transform(df_x_train)
df_x_test.iloc[:,:] = scaler.transform(df_x_test)

df_x_train.iloc[:,:] = np.clip(df_x_train,-5,5)
df_x_test.iloc[:,:] = np.clip(df_x_test,-5,5)

# Fine Tuning

## Modelo

In [None]:
METRICS = [
      metrics.TruePositives(name='tp'),
      metrics.FalsePositives(name='fp'),
      metrics.TrueNegatives(name='tn'),
      metrics.FalseNegatives(name='fn'), 
      metrics.BinaryAccuracy(name='accuracy'),
      metrics.Precision(name='precision'),
      metrics.Recall(name='recall'),
      metrics.AUC(name='auc'),
      metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

In [None]:
def fine_tuning_model(base_model_path, hp, n_features):
    base_model = load_model(base_model_path)
    #print(base_model.layers[0].input_shape)
    new_model = base_model
        #print(len(new_model.layers))
        #for layer in new_model.layers[:-1]:
        #    layer.trainable = False
    new_model.compile(optimizer=Adam(learning_rate=1e-3),
                loss=BinaryCrossentropy(), metrics=METRICS)
    return new_model


Variables

In [None]:
n_steps_out = [1,6,12]
n_features = len(features)
fecha_hora = datetime.datetime.now().strftime('%Y%m%d_%H%M')
n_epochs = 1000

In [None]:
output_path_model = 'modelos/'
name_prj = 'GRU_TL_PERU_'
N = 20

In [None]:
hps = [
    {'steps_in': 6},
    {'steps_in': 6},
    {'steps_in': 9},
]
base_models = [
    'modelos/GRU_USA_1_20221230_1705_19.h5',
    'modelos/GRU_USA_6_20221230_1705_0.h5',
    'modelos/GRU_USA_12_20221230_1717_8.h5',
]

In [None]:
def entrenar(name_prj, n_steps_out, base_model_path, hp):
    name_prj = name_prj + str(n_steps_out) +'_'+fecha_hora

    # --------------------- Entrenamiento --------------------------------------
    n_steps_in = hp['steps_in']
    x_train, y_train = shift_data(df_x_train, df_y_train, n_steps_in, n_steps_out)
    x_test, y_test = shift_data(df_x_test, df_y_test, n_steps_in, n_steps_out)
    batch_learning = len(x_train)

    prcs = []
    models = []
    res_path = output_path_model+name_prj+".csv"
    print_line("loss,tp,fp,tn,fn,accuracy,precision,recall,auc,prc\n",res_path)

    print("Entrenando...")
    for i in range(N):
        name_model = output_path_model+name_prj+'_'+str(i)+'.h5'
        gru_model = fine_tuning_model(base_model_path, hp, n_features)
        # Condicion de parada: 10 epocas despues del menor val_loss
        es_t = EarlyStopping(
            monitor='val_prc', 
            verbose=0,
            patience=50,
            mode='max',
            restore_best_weights=True)

        models.append(name_model)
        gru_model.fit(x=x_train, y=y_train, validation_split = 0.5, epochs = n_epochs
            , verbose = 0, shuffle = False, callbacks = [es_t], 
            batch_size = batch_learning, class_weight = class_weight)

        gru_model.save(name_model) 
        # --------------------- Evaluacion --------------------------------------
        res = gru_model.evaluate(x_test, y_test,
                                        batch_size=batch_learning, verbose=0)
        print_line(f'{res[0]},{res[1]},{res[2]},{res[3]},{res[4]},{res[5]},{res[6]},{res[7]},{res[8]},{res[9]}\n', res_path)
        prcs.append(res[9]) #guarda metrica a comparar
    
    #--------- Evaluando el mejor ------------------------
    print("Evaluando mejor...")
    best_model = models[np.argmax(prcs)]
    print(f'\nMejor modelo: {best_model} con prc: {np.max(prcs)}\n')
    for i in range(N):
        if i == np.argmax(prcs):
            continue
        os.remove(models[i])

    gru_model = load_model(best_model)
    train_predictions = gru_model(x_train) #, batch_size=batch_learning
    test_predictions = gru_model(x_test)

    plot_cm(y_test, test_predictions)
    plot_roc(y_train, train_predictions, y_test, test_predictions)
    plot_prc(y_train, train_predictions, y_test, test_predictions)
    plot_probs(y_train, train_predictions, 'Train')
    plot_probs(y_test, test_predictions, 'Test')

# Modelo para 1 mes a futuro

In [None]:
entrenar(name_prj, n_steps_out[0], base_models[0], hps[0])

# Modelo para 6 meses a futuro

In [None]:
entrenar(name_prj, n_steps_out[1], base_models[1], hps[1])

# Modelo para 12 meses a futuro

In [None]:
entrenar(name_prj, n_steps_out[2], base_models[2], hps[2])