In [1]:
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split

In [2]:
import matplotlib.pyplot as plt
def plot_history(history):
    # list all data in history
    print(history.history.keys())
    # summarize history for accuracy

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['accuracy'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [3]:
scaler=pickle.load(open('std_scaler.pkl','rb'))

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [4]:
def prepare_data_tensorflow(data):
    instrument_list = data.iloc[:, -11:]
    train = data.iloc[:, 1:-11]
    X = scaler.transform(np.array(train, dtype=float))
    y = instrument_list
    return X, y

In [5]:
test = pd.read_csv('train.csv')
X, y = prepare_data_tensorflow(test)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(X_train.shape, y_train.shape)

(48680, 26) (48680, 11)


In [6]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, LeakyReLU, Conv1D, Conv2D, Flatten, MaxPooling2D, Input
from tensorflow.keras.layers import BatchNormalization, InputLayer, Reshape, Activation, GlobalAveragePooling1D, Normalization
from tensorflow.keras.layers import AveragePooling2D, AveragePooling1D, UpSampling1D, UpSampling2D, MaxPooling1D

In [7]:
from tensorflow.keras.optimizers import Adam
def get_optimizador():
    adam = Adam(learning_rate=1e-5)
    return adam

In [8]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, RemoteMonitor, TerminateOnNaN, BackupAndRestore
from livelossplot import PlotLossesKeras

def get_callbacks(name="model"):
    #EarlyStopping, detener el entrenamiento una vez que su pérdida comienza a aumentar
    early_stop = EarlyStopping(
        monitor='accuracy',
        patience=8, #argumento de patience representa el número de épocas antes de detenerse una vez que su pérdida comienza a aumentar (deja de mejorar).
        min_delta=0,  #es un umbral para cuantificar una pérdida en alguna época como mejora o no. Si la diferencia de pérdida es inferior a min_delta , se cuantifica como no mejora. Es mejor dejarlo como 0 ya que estamos interesados ​​en cuando la pérdida empeora.
        restore_best_weights=True,
        mode='max')

    #ReduceLROnPlateau, que si el entrenamiento no mejora tras unos epochs específicos, reduce el valor de learning rate del modelo
    reduce_lr = ReduceLROnPlateau(
        monitor='loss', 
        factor=0.1, 
        patience=5, 
        min_delta=1e-4, 
        mode='min',
        verbose=1,
    )

    # Saves Keras model after each epoch
    #Para algunos casos es importante saber cual entrenamiento fue mejor, 
    #este callback guarda el modelo tras cada epoca completada con el fin de si luego se desea un registro de pesos para cada epoca
    #Se ha usado este callback para poder optener el mejor modelo de pesos, sobretodo en la red neuronal creada desde cero
    #siendo de gran utilidad para determinar el como ir modificando los layer hasta obtener el mejor modelo
    checkpointer = ModelCheckpoint(
        filepath='models_backup/' + name +'-{val_accuracy:.4f}.h5', 
        monitor='val_accuracy',
        verbose=1, 
        mode='max',
        save_best_only=True,
        save_weights_only=False
    )

    remote_monitor = RemoteMonitor(
        root='http://localhost:6006',
        path='/publish/epoch/end/',
        field='data',
        headers=None,
        send_as_json=False
    )
    
    backup_restore = BackupAndRestore(backup_dir="backup")
    
    proteccion_nan_loss = TerminateOnNaN()

    
    callbacks_list = [early_stop, reduce_lr, checkpointer, proteccion_nan_loss, backup_restore]#, remote_monitor]
    
    return callbacks_list
    

In [9]:
def testear_modelo(model):
    print("\n"*5)
    print("*"*20)
    test = pd.read_csv('test.csv')
    X, y = prepare_data_tensorflow(test)

    test_loss, test_acc = model.evaluate(X, y)
    
    print('test_acc: ', test_acc)
    

def re_train(model, name="model"):
    history = model.fit(X_train,
                        y_train,
                        validation_data=(X_test, y_test),
                        epochs=500,
                        #batch_size=64,
                        verbose=1,
                        callbacks=get_callbacks(name)
                        )
    return history, model


def show_result(history):
    print(plot_history(history)) 

def evaluate(model):
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print('Evaluate train acc: ', test_acc)
    

def train_evaluate_model(model_train, name="model"):
    model_train.compile(optimizer=get_optimizador(),
                  loss='mean_squared_error',  # categorical_crossentropy sparse_categorical_crossentropy
                  metrics=['accuracy'])

    history, model = re_train(model_train, name)

    show_result(history)
    
    print("\n"*5)
    print("*"*20)

    evaluate(model_train)
    
    testear_modelo(model_train)    
    

    model_train.save("models/model_" + name + ".h5")
    print("\n"*5)
    
    return model_train

In [10]:
def modelo_basico(cantidad_entradas, cantidad_salidas):    
    model = Sequential(name="RedBasica")
    model.add(Dense(64, activation='relu', input_shape=(cantidad_entradas,)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(cantidad_salidas, activation='softmax', name='output_layer'))
    
    return model





model = modelo_basico(X_train.shape[1], y_train.shape[1])
model.summary()

model = train_evaluate_model(model, "basico")

Model: "RedBasica"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                1728      
                                                                 
 dense_1 (Dense)             (None, 128)               8320      
                                                                 
 dense_2 (Dense)             (None, 256)               33024     
                                                                 
 dense_3 (Dense)             (None, 128)               32896     
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dense_5 (Dense)             (None, 32)                2080      
                                                                 
 dropout (Dropout)           (None, 32)                0 

ValueError: Shapes (128,) and (64,) are incompatible

In [None]:
for _ in range(5):
    history, model = re_train(model, "basico_r")
    show_result(history)
    evaluate(model)

In [None]:
def modelo_simple(cantidad_entradas, cantidad_salidas):
    #inputs = Input(shape=(cantidad_entradas,), name="Entradas")
    model = Sequential(name="Redsimple")  # los nombres van sin espacios
    #model.add(inputs)
    model.add(Dense(8 * 4, activation="relu",  input_shape=(cantidad_entradas,)) ) 
    model.add(Reshape((8, 4)))
    #model.add(Dense(32, activation="relu"))

    model.add(UpSampling1D(size=3))
    model.add(Conv1D(12, kernel_size=3, padding="same", activation='relu'))
    model.add(Conv1D(12, kernel_size=3, padding="same", activation='relu'))
    model.add(Conv1D(12, kernel_size=3, padding="same", activation='relu'))
    model.add(MaxPooling1D(pool_size=3, strides=1, padding='valid'))

    model.add(Conv1D(24, kernel_size=3, padding="same", activation='relu'))
    model.add(Conv1D(24, kernel_size=3, padding="same", activation='relu'))
    model.add(Conv1D(32, kernel_size=3, padding="same", activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=1, padding='valid'))

    model.add(Conv1D(32, kernel_size=3, padding="same", activation='relu'))
    model.add(Conv1D(32, kernel_size=3, padding="same", activation='relu'))
    model.add(MaxPooling1D(pool_size=3, strides=1, padding='valid'))

    model.add(Conv1D(24, kernel_size=3, padding="same", activation='relu'))
    model.add(Conv1D(24, kernel_size=3, padding="same", activation='relu'))
    model.add(Conv1D(16, kernel_size=3, padding="same", activation='relu'))
    model.add(MaxPooling1D(pool_size=3, strides=1, padding='valid'))

    model.add(Conv1D(16, kernel_size=3, padding="same", activation='relu'))
    model.add(Conv1D(8, kernel_size=3, padding="same", activation='relu'))
    model.add(MaxPooling1D(pool_size=2, strides=1, padding='valid'))

    model.add(Dense(24, activation="relu"))
    model.add(Dense(16, activation="relu"))
    model.add(MaxPooling1D(pool_size=2, strides=1, padding='valid'))

    model.add(Dropout(0.5))

    model.add(Dense(16, activation="relu"))
    model.add(Dense(8, activation="relu"))

    # model.add( AveragePooling1D(pool_size=2, strides=1, padding='valid') )

    model.add(GlobalAveragePooling1D())

    model.add(Flatten())

    model.add(Dense(8, activation="relu"))
    model.add(Dense(3, activation="relu"))

    model.add(Dense(cantidad_salidas, activation="softmax", name='output_layer'))

    return model

model = modelo_simple(X_train.shape[1], y_train.shape[1])
model = train_evaluate_model(model, "simple")

# Usando el dataset de test

In [None]:
json_modelos = {
    "82_9": "model82_9.h5",
    "76_6": "model76_6.h5",
    "now": "model.h5"
}

from tensorflow.keras.models import load_model
model = load_model(json_modelos['now'])

test = pd.read_csv('test.csv')
X, y = prepare_data_tensorflow(test)

test_loss, test_acc = model.evaluate(X, y)

print('test_acc: ', test_acc)

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
encoder = LabelEncoder()