In [None]:
import sys
sys.path.append("../libs/")

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from utils import shift_data, print_hp,print_line, graficarTodo, split_df, graficarClases, plot_metrics, plot_cm, plot_roc, plot_prc, plot_probs
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import Constant
from tensorflow.keras.optimizers import Adam
import datetime
import os
import keras_tuner as kt
from keras_tuner import HyperModel
import keras.metrics as metrics
from keras.models import Sequential, load_model
from keras.layers import Dense, Flatten, Dropout
from keras.losses import BinaryCrossentropy

# Carga de datos

In [None]:
df = pd.read_csv('../data/USA_DATA_DIFF.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.set_index('DATE')

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
graficarTodo(df,'Features')

# Mostrar Imbalance de clases

In [None]:
neg, pos = np.bincount(df['Class'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

In [None]:
graficarClases(df['Class'])

In [None]:
initial_bias = np.log([pos/neg])

In [None]:
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

# Dividir Datos

In [None]:
target_col = ['Class']
features = df.columns[(df.columns!=target_col[0])]
df_train, df_test = split_df(df,0.2)

# Normalizar

In [None]:
df_x_train = df_train[features].copy()
df_y_train = df_train[target_col].copy()
df_x_test = df_test[features].copy()
df_y_test = df_test[target_col].copy()

scaler = StandardScaler()

df_x_train.iloc[:,:] = scaler.fit_transform(df_x_train)
df_x_test.iloc[:,:] = scaler.transform(df_x_test)

df_x_train.iloc[:,:] = np.clip(df_x_train,-5,5)
df_x_test.iloc[:,:] = np.clip(df_x_test,-5,5)

# Construccion

## Modelo

In [None]:
METRICS = [
      metrics.TruePositives(name='tp'),
      metrics.FalsePositives(name='fp'),
      metrics.TrueNegatives(name='tn'),
      metrics.FalseNegatives(name='fn'), 
      metrics.BinaryAccuracy(name='accuracy'),
      metrics.Precision(name='precision'),
      metrics.Recall(name='recall'),
      metrics.AUC(name='auc'),
      metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

def build_mlp(n_steps_in: int, n_features: int, nodes: int,
              layers: int, dropout : float, learning_rate: float, activation=None,
              metrics = METRICS, output_bias = None):
    if output_bias is not None:
        output_bias = Constant(output_bias)
    model = Sequential()
    model.add(Flatten(input_shape=(n_steps_in, n_features)))
    for i in range(layers):
        model.add(Dense(nodes, activation=activation))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation='sigmoid', bias_initializer=output_bias))
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss=BinaryCrossentropy(), metrics=metrics)
    return model

## Optimizador

In [None]:
class MLPHyperModel(HyperModel):

    def __init__(self, n_features_in,n_steps_out, output_bias = None, class_weight= None, name = None, tunable = True):
        super().__init__(name=name, tunable=tunable)
        self.n_features = n_features_in
        self.n_steps_out = n_steps_out
        self.output_bias = output_bias
        self.class_weight = class_weight

    def build(self, hp):
        # Parametrizamos nro de capas, nro de nodos y ratio de aprendizaje
        hp_time_steps = hp.Int('steps_in',4,24,step=1)
        hp_layers = hp.Int('layers',1,5,step=1)
        hp_nodes = hp.Int('nodes',32,356,step=16)
        hp_dropout = hp.Float('dropout',0.1,0.5,step=0.1)
        hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5])
        #hp_activation = hp.Choice('activation', values=['relu','tanh','sigmoid'])

        return build_mlp(
            n_steps_in = hp_time_steps,
            n_features = self.n_features,
            nodes = hp_nodes,
            layers = hp_layers,
            dropout = hp_dropout,
            learning_rate = hp_learning_rate,
            activation = 'relu', ##hp_activation,
            output_bias = self.output_bias
        )

    def fit(self, hp, model,x,y,**kwargs):
        x_s,y_s = shift_data(x, y, hp.get('steps_in'), self.n_steps_out)
        #mini_batch = 32
        batch_learning = len(x_s)
        return model.fit(x = x_s, y = y_s, batch_size = batch_learning, class_weight=self.class_weight, **kwargs)

In [None]:
n_steps_out = [1,6,12,24]
n_features = len(features)
ajuste_path = os.path.normpath('G:/')
fecha_hora = datetime.datetime.now().strftime('%Y%m%d_%H%M')
objective = kt.Objective('val_auc',direction='max')
max_epochs = 30
n_epochs = 50

In [None]:
output_path = 'ajustes/'
output_path_model = 'modelos/'
name_prj = 'MLP_USA_'
N = 30


In [None]:
def optimizar_y_entrenar(name_prj, n_steps_out):
    name_prj = name_prj + str(n_steps_out) +'_'+fecha_hora
    name_model_tun = output_path+name_prj+'.h5'
    # Condicion de parada: 10 epocas despues del menor val_loss
    es = EarlyStopping(
        monitor='val_auc', 
        verbose=1,
        patience=10,
        mode='max',
        restore_best_weights=True)
    
    # --------------------- Optimizacion --------------------------------------
    mlp_hypermodel = MLPHyperModel(n_features, n_steps_out, class_weight = class_weight) # output_bias=initial_bias

    mlp_tuner = kt.Hyperband(
        mlp_hypermodel,
        objective = objective,
        max_epochs = max_epochs,
        #directory = ajuste_path,
        project_name = name_prj,
        overwrite=True)

    print("Optimizando...")
    mlp_tuner.search(x = df_x_train, y = df_y_train, validation_split = 0.3, epochs = n_epochs
        , verbose = 2, shuffle = False, callbacks = [es])

    # guardar parametros de mejor modelo
    best_mlp_hps = mlp_tuner.get_best_hyperparameters(num_trials = 1)[0]
    print_hp(output_path+name_prj+'.txt',mlp_tuner)

    mlp_model = mlp_tuner.hypermodel.build(best_mlp_hps)
    mlp_model.save(name_model_tun) # modelo sin entrenar
    print(best_mlp_hps.values)

    # --------------------- Entrenamiento --------------------------------------
    n_steps_in = best_mlp_hps.get('steps_in')
    x_train, y_train = shift_data(df_x_train, df_y_train, n_steps_in, n_steps_out)
    x_test, y_test = shift_data(df_x_test, df_y_test, n_steps_in, n_steps_out)
    batch_learning = len(x_train)

    aucs = []
    models = []
    res_path = output_path_model+name_prj+".csv"
    print_line("tp,fp,tn,fn,accuracy,precision,recall,auc,prc\n",res_path)

    for i in range(N):
        name_model = output_path_model+name_prj+'_'+str(i)+'.h5'
        models.append(name_model)
        print("Entrenando...")
        history = mlp_model.fit(x=x_train, y=y_train, validation_split = 0.3, epochs = n_epochs
            , verbose = 0, shuffle = False, callbacks = [es], batch_size = batch_learning, class_weight=class_weight)

        mlp_model.save(name_model) 
        plot_metrics(history)

        # --------------------- Evaluacion --------------------------------------
        print("Entrenando...")
        res = mlp_model.evaluate(x_test, y_test,
                                        batch_size=batch_learning, verbose=0)
        print_line(f'{res[0]},{res[1]},{res[2]},{res[3]},{res[4]},{res[5]},{res[6]},{res[7]},{res[8]}\n', res_path)
        aucs.append(res[7]) #guarda metrica a comparar

    #--------- Evaluando el mejor ------------------------
    best_model = models[np.argmax(aucs)]

    mlp_model = load_model(best_model)
    eval_results = mlp_model.evaluate(x_test, y_test,
                                    batch_size=batch_learning, verbose=0)
    train_predictions = mlp_model(x_train) #, batch_size=batch_learning
    test_predictions = mlp_model(x_test)
    for name, value in zip(mlp_model.metrics_names, eval_results):
        print(name, ': ', value)
    print()

    plot_cm(y_test, test_predictions)
    plot_roc(y_train, train_predictions, y_test, test_predictions)
    plot_prc(y_train, train_predictions, y_test, test_predictions)
    plot_probs(y_train, train_predictions, 'Train')
    plot_probs(y_test, test_predictions, 'Test')

# Modelo para 1 mes a futuro

In [None]:
optimizar_y_entrenar(name_prj, n_steps_out[0])

# Modelo para 6 meses a futuro

In [None]:
optimizar_y_entrenar(name_prj, n_steps_out[1])

# Modelo para 12 meses a futuro

In [None]:
optimizar_y_entrenar(name_prj, n_steps_out[2])

# Modelo para 24 meses a futuro

In [None]:
optimizar_y_entrenar(name_prj, n_steps_out[3])

In [None]:
# #sys.path.append("../libs/")
# from notifications import enviar_correo
# enviar_correo("Ajuste de Parametros Finalizado!","Se ha completado: {}".format(name_prj))