# Importing Libraries

In [1]:
import pandas as pd
from utils import common_functions 
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from keras.regularizers import l2
from tensorflow.keras.callbacks import Callback
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error

import mlflow
from mlflow.exceptions import RestException

from sklearn.preprocessing import MinMaxScaler
from BinanceUtil import BinanceUtil 
from datetime import datetime

# Iniciando funciones

In [2]:
get_wt_coeff_inv = common_functions().get_wt_coeff_inv
plot_inv_wv = common_functions().plot_inv_wv
create_sequences = common_functions().create_sequences
standarize_data = False

# loading data

In [3]:
df = pd.read_csv('data/datos_XRPUSDT.csv',header=0)

#multipling by 1M the close data
#df['closex1M'] = df['Close']*1000000 #PEPE
#df['closex1M'] = df['Close']*100 #DOGE
df['closex1M'] = df['Close']*10 #xrp

In [4]:
#seleccionando los campos que necesitamos
select = ['Close time_date','closex1M']
#select = ['Close time_date','closeNormalized']

if standarize_data:
    #Standarizing data
    scaler = MinMaxScaler()
    df['closex1M'] = scaler.fit_transform(df[['closex1M']])
    df['closex1M'] = df['closex1M'] * 10

df_clean = df[select]

# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [5]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'INV_WV_PLUS_MODELS' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Victor Moreno'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'INV_WV_PLUS_MODELS' already exists.
Full name INV_WV_PLUS_MODELS


## Creando la ejecucion

Se definen las variables iniciales necesarias para ejecuciones abajo

In [6]:
aproximacion = 'cA2'
detalle = 'cD3'
parent_run = 'XRP'
child_lv1 = 'LSTM'
child_lv2 = aproximacion+detalle


#variables para entrenar modelo
fn_activation = 'relu'
out_fn_activation = 'linear'
layers = [('LSTM',100),('LSTM',50),('LSMT',25),('FULLY',1)]
epochs = 20
batch_size = 16
loss_metric = 'mse'
model_metrics = ['mae']
model_result_path = 'model_results.html'
optimizer = 'adam'

In [7]:
parent_run, child_lv1, child_lv2

('XRP', 'LSTM', 'cA2cD3')

In [8]:
with mlflow.start_run(experiment_id=experiment_id
                      #,run_name=parent_run# El nombre se coloca solo la primera vez
                      ,run_id='263737daecc0412a9e6968477632369f'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          #,run_name=child_lv1
                          ,run_id='52eb14f767ea4667b78b1ebd9b426bd8'
                          ,nested=True) as child_run1:
        with mlflow.start_run(experiment_id=experiment_id
                          ,run_name=child_lv2
                          ,nested=True) as child_run2:
            print(f'Run creada para {child_lv2} con run_id',child_run2.info.run_id)
            mlflow.end_run()


Run creada para cA2cD3 con run_id 2fa5a7a101c84c25be88966158b920af
🏃 View run cA2cD3 at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/2fa5a7a101c84c25be88966158b920af
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655
🏃 View run LSTM at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/52eb14f767ea4667b78b1ebd9b426bd8
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655
🏃 View run XRP at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/263737daecc0412a9e6968477632369f
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655


# Aplicando Transformada de Wavelet

In [9]:
n = df_clean.shape[0] #Cantidad de puntos a tratar
data = np.array(df_clean['closex1M'][:n]) #valores de la serie temporal
dates = df_clean['Close time_date'][:n] #valores de las fechas

#llamando la funcion de get_wt_coeff_inv para obtener 
## Coeficientes de wavelet y la senal resconstruida desde estos
coeffs_lv3, inv_coeffs_lv3 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=3
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv2, inv_coeffs_lv2 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=2
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv4, inv_coeffs_lv4 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=4
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv1, inv_coeffs_lv1 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=1
                                      ,mode='symmetric'
                                      ,take=n)

## Visualizando la senal reconstruida

In [10]:
#incorporando la senal real en tiempo como externa para comparacion

wave_img_path = 'descomposicion_wavelet.html'

external_signals = {'real':data,
                    aproximacion:inv_coeffs_lv2[aproximacion]
                    }
plot_inv_wv(inv_coeffs=inv_coeffs_lv3
            ,date_signal=dates
            ,external_signals=external_signals
            ,output_path=wave_img_path)

In [11]:
mlflow.log_artifact(wave_img_path,run_id=child_run2.info.run_id,artifact_path='resultados')

# Entrenamiento

## Preparacion de la data

In [12]:
df_train = pd.DataFrame({aproximacion:inv_coeffs_lv2[aproximacion]
                         ,detalle:inv_coeffs_lv3[detalle]})
window_size = 24


X, Y = create_sequences(df_train,window_size=window_size,target_col=aproximacion)


#Espliteando la data
X_train, X_vt, y_train, y_vt = train_test_split(X, Y, test_size=0.3, shuffle=False)

x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)


# Reshape para que sea compatible con LSTM (samples, time steps, features)
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


## Creando modelo

In [13]:
class CustomEarlyStopping(Callback):
    def __init__(self, threshold=0.18):
        super(CustomEarlyStopping, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        # Access validation MAE
        val_mae = logs.get('val_mae')

        # Check if the previous validation MAE is available
        if hasattr(self, 'previous_val_mae'):
            # Stop training if current val_mae is less than the threshold
            # and the previous val_mae was greater than the threshold
            if val_mae < self.threshold and self.previous_val_mae >= val_mae:
                print(f"\nStopping training at epoch {epoch + 1} as val_mae {val_mae:.4f} "
                      f"is below the threshold {self.threshold} and previous val_mae was "
                      f"{self.previous_val_mae:.4f}.")
                self.model.stop_training = True

        # Store the current val_mae as the previous for the next epoch
        self.previous_val_mae = val_mae

# Usage
custom_callback = CustomEarlyStopping(threshold=2)


In [14]:
#from keras.layers import LeakyReLU
#fn_activation = LeakyReLU(0.5)

In [15]:
model = Sequential([
    Input(shape=(window_size,2)),
    LSTM(layers[0][1],activation=fn_activation,return_sequences=True,recurrent_regularizer=l2(0.01)),
    LSTM(layers[1][1],activation=fn_activation,return_sequences=True,recurrent_regularizer=l2(0.1)),
    LSTM(layers[2][1],activation=fn_activation,return_sequences=False,recurrent_regularizer=l2(0.01)),
    #LSTM(layers[3][1],activation=fn_activation,return_sequences=False,recurrent_regularizer=l2(0.01)),
    Dense(layers[3][1],activation=out_fn_activation)
])
model.compile(optimizer=optimizer
              ,loss=loss_metric
              ,metrics=model_metrics
              )

#Entrenando el modelo
ti = datetime.now()
history = model.fit(X_train
                    ,y_train
                    ,epochs=epochs
                    ,batch_size=batch_size
                    ,validation_data=(x_val, y_val)
                    ,verbose=1
                    ,callbacks=[custom_callback]
                    )
tf = datetime.now()


Epoch 1/20
[1m699/699[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - loss: 3.7865 - mae: 0.5474 - val_loss: 2452.0381 - val_mae: 9.7318
Epoch 2/20
[1m699/699[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - loss: 0.2880 - mae: 0.0975 - val_loss: 92.7366 - val_mae: 5.4058
Epoch 3/20
[1m699/699[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - loss: 0.1354 - mae: 0.0808 - val_loss: 251.5313 - val_mae: 9.0862
Epoch 4/20
[1m699/699[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - loss: 0.0756 - mae: 0.0759 - val_loss: 608.4476 - val_mae: 13.8479
Epoch 5/20
[1m699/699[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - loss: 0.0442 - mae: 0.0662 - val_loss: 1160.5673 - val_mae: 19.0612
Epoch 6/20
[1m699/699[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - loss: 0.0293 - mae: 0.0649 - val_loss: 1539.1356 - val_mae: 21.8277
Epoch 7/20
[1m699/699[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [16]:
#graficando el MAE 
fig = go.Figure()

fig.add_trace(go.Scatter(x=history.epoch
                            ,y=history.history['mae']
                            ,mode='lines'
                            ,name='train'
                            ,line = dict(color='orange')))
fig.add_trace(go.Scatter(x=history.epoch
                            ,y=history.history['val_mae']
                            ,mode='lines'
                            ,name='val'
                            ,line = dict(color='blue')))
fig.update_layout(
    xaxis_title="Epochs",
    yaxis_title="MAE",
    title='Epochs vs MAE'
)
fig.write_html('epochsVSmae.html')
fig.show()


In [17]:
mlflow.log_artifact('epochsVSmae.html',run_id=child_run2.info.run_id,artifact_path='resultados')

In [18]:
#graficando el MAE 
fig = go.Figure()

fig.add_trace(go.Scatter(x=history.epoch
                            ,y=history.history['loss']
                            ,mode='lines'
                            ,name='train'
                            ,line = dict(color='magenta')))
fig.add_trace(go.Scatter(x=history.epoch
                            ,y=history.history['val_loss']
                            ,mode='lines'
                            ,name='val'
                            ,line = dict(color='red')))


fig.update_layout(
    xaxis_title="Epochs",
    yaxis_title="Loss",
    title='Epochs vs Loss'
)
fig.write_html('epochsVSloss.html')
fig.show()


In [19]:
mlflow.log_artifact('epochsVSloss.html',run_id=child_run2.info.run_id,artifact_path='resultados')

In [20]:
# Guardando los parametros en la ejecucion 
parameters = {'data_standarizada':standarize_data
              ,'CoeficienteAproximacion':aproximacion
              ,'CoeficienteDetalle':detalle
              ,'puntos_entrenamiento':X_train.shape[0]
              ,'puntos_validacion':x_val.shape[0]
              ,'puntos_testeo':x_test.shape[0]
              ,'fn_activacion_por_capa':fn_activation
              ,'fn_activacion_salida':out_fn_activation
              ,'capas':layers
              ,'window_size':window_size
              ,'loss_metrics':loss_metric
              ,'model_metrics':model_metrics
              ,'epochs':epochs
              ,'batch_size':batch_size
              ,'optimizer':optimizer
              ,'tiempo_entrenamiento':(tf-ti).seconds}

mlflow.log_params(params=parameters
                  ,run_id=child_run2.info.run_id)

In [21]:
#Obteniendo las prediccciones del modelo
train_pred = model.predict(X_train).reshape(-1)
val_pred = model.predict(x_val).reshape(-1)
test_pred = model.predict(x_test).reshape(-1)




[1m350/350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [22]:
#Saving model
signature = mlflow.models.infer_signature(X_train,test_pred)
#log the model
with mlflow.start_run(run_id=child_run2.info.run_id) as run_model:
    mlflow.keras.log_model(model,'model',signature=signature)

🏃 View run cA2cD3 at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/2fa5a7a101c84c25be88966158b920af
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655


In [23]:
train_test = {'train':train_pred,'test':test_pred}

fig = go.Figure()

offset = window_size
date_train = dates[offset:len(train_pred)+offset]
date_val = dates[len(train_pred)+offset:len(train_pred)+len(val_pred)+offset]
date_test = dates[len(train_pred)+len(val_pred)+offset:]

fig.add_trace(go.Scatter(x=dates
                            ,y=data
                            ,mode='lines'
                            ,name='real'
                            ,line = dict(color='green')))

fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred
                            ,mode='lines'
                            ,name='train'
                            ,line=dict(color='blue')))

fig.add_trace(go.Scatter(x=date_val
                            ,y=val_pred
                            ,mode='lines'
                            ,name='val'
                            ,line=dict(color='magenta')))

fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred
                            ,mode='lines'
                            ,name='test'
                            ,line= dict(color='red')))


fig.write_html(model_result_path)
    
fig.show()



In [24]:
#Guardando la imagen del resultado del modelo
mlflow.log_artifact(model_result_path,run_id=child_run2.info.run_id,artifact_path='resultados')

# Guardando metricas

In [25]:

mae_test = mean_absolute_error(y_true=y_test, y_pred=test_pred)
mae_train = mean_absolute_error(y_true=y_train, y_pred=train_pred)
mae_val = mean_absolute_error(y_true=y_val, y_pred=val_pred)

print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}

MAE train: 0.04136248259207257,   MAE val: 32.38945698508168,  MAE test: 92.28951530039237


In [26]:
mlflow.log_metrics(metrics=metrics,run_id=child_run2.info.run_id)

In [27]:
#mlflow.set_tags

In [28]:
#import mlflow
#logged_model = 'runs:/90d37badc60a433f99592d11d49a18ed/model'

# Load model as a PyFuncModel.
#loaded_model = mlflow.pyfunc.load_model(logged_model)
#m = loaded_model.get_raw_model()
#m.to_json()



In [29]:
#with open('model_json','w+') as f:
#    f.write(m.to_json())