# Importing Libraries

In [1]:
import pandas as pd
from utils import common_functions 
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from keras.regularizers import l2
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error

import mlflow
from mlflow.exceptions import RestException

from sklearn.preprocessing import MinMaxScaler
from BinanceUtil import BinanceUtil 
from datetime import datetime

# Iniciando funciones

In [2]:
get_wt_coeff_inv = common_functions().get_wt_coeff_inv
plot_inv_wv = common_functions().plot_inv_wv
create_sequences = common_functions().create_sequences





# loading data

In [3]:
df = pd.read_csv('data/datos_PEPEUSDT.csv',header=0)

#multipling by 1M the close data
df['closex1M'] = df['Close']*1000000

In [4]:
#seleccionando los campos que necesitamos
select = ['Close time_date','closex1M']
#select = ['Close time_date','closeNormalized']
df_clean = df[select]

# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [5]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'INV_WV_PLUS_MODELS' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Victor Moreno'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'INV_WV_PLUS_MODELS' already exists.
Full name INV_WV_PLUS_MODELS


## Creando la ejecucion

Se definen las variables iniciales necesarias para ejecuciones abajo

In [6]:
aproximacion = 'cA2'
detalle = 'cD2'
parent_run = 'PEPE'
child_lv1 = 'LSTM'
child_lv2 = aproximacion+detalle


#variables para entrenar modelo
fn_activation = 'relu'
out_fn_activation = 'linear'
layers = [('LSTM',200),('LSTM',100),('LSTM',50),('LSMT',25),('FULLY',1)]#,('LSTM',300),('DENSE',1)]
epochs = 20
batch_size = 16
loss_metric = 'mse'
model_metrics = ['mae']
model_result_path = 'model_results.html'
optimizer = 'adam'

In [7]:
with mlflow.start_run(experiment_id=experiment_id
                      #,run_name=parent_run# El nombre se coloca solo la primera vez
                      ,run_id='42e6592c9dd141b99eec4768fea566be'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          #,run_name=child_lv1
                          ,run_id='cd5a8aeaf4c54fce80fab3a3b2dd1053'
                          ,nested=True) as child_run1:
        with mlflow.start_run(experiment_id=experiment_id
                          ,run_name=child_lv2
                          ,nested=True) as child_run2:
            print(f'Run creada para {child_lv2} con run_id',child_run2.info.run_id)
            mlflow.end_run()


Run creada para cA2cD2 con run_id bcce09f079c348a3b13f91c92182a335
🏃 View run cA2cD2 at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/bcce09f079c348a3b13f91c92182a335
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655
🏃 View run LSTM at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/cd5a8aeaf4c54fce80fab3a3b2dd1053
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655
🏃 View run LSTM at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/42e6592c9dd141b99eec4768fea566be
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655


# Aplicando Transformada de Wavelet

In [8]:
n = df_clean.shape[0] #Cantidad de puntos a tratar
data = np.array(df_clean['closex1M'][:n]) #valores de la serie temporal
dates = df_clean['Close time_date'][:n] #valores de las fechas

#llamando la funcion de get_wt_coeff_inv para obtener 
## Coeficientes de wavelet y la senal resconstruida desde estos
coeffs_lv3, inv_coeffs_lv3 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=3
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv2, inv_coeffs_lv2 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=2
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv4, inv_coeffs_lv4 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=4
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv1, inv_coeffs_lv1 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=1
                                      ,mode='symmetric'
                                      ,take=n)

## Visualizando la senal reconstruida

In [9]:
#incorporando la senal real en tiempo como externa para comparacion

wave_img_path = 'descomposicion_wavelet.html'

external_signals = {'real':data,
                    aproximacion:inv_coeffs_lv2[aproximacion]
                    }
plot_inv_wv(inv_coeffs=inv_coeffs_lv2
            ,date_signal=dates
            ,external_signals=external_signals
            ,output_path=wave_img_path)

In [10]:
mlflow.log_artifact(wave_img_path,run_id=child_run2.info.run_id,artifact_path='resultados')

# Entrenamiento

## Preparacion de la data

In [11]:
df_train = pd.DataFrame({aproximacion:inv_coeffs_lv2[aproximacion]
                         ,detalle:inv_coeffs_lv2[detalle]})
window_size = 24


X, Y = create_sequences(df_train,window_size=window_size,target_col=aproximacion)


#Espliteando la data
X_train, X_vt, y_train, y_vt = train_test_split(X, Y, test_size=0.3, shuffle=False)

x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)


# Reshape para que sea compatible con LSTM (samples, time steps, features)
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


## Creando modelo

In [14]:
model = Sequential([
    Input(shape=(window_size,2)),
    LSTM(layers[0][1],activation=fn_activation,return_sequences=True,recurrent_regularizer=l2(0.01)),
    LSTM(layers[1][1],activation=fn_activation,return_sequences=True,recurrent_regularizer=l2(0.1)),
    LSTM(layers[2][1],activation=fn_activation,return_sequences=False,recurrent_regularizer=l2(0.01)),
    #LSTM(layers[3][1],activation=fn_activation_by_layer[3],return_sequences=False),
    Dense(layers[3][1],activation=out_fn_activation)
])
model.compile(optimizer=optimizer,loss=loss_metric,metrics=model_metrics)

#Entrenando el modelo
ti = datetime.now()
history = model.fit(X_train,
                    y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(x_val, y_val),
                    verbose=1
                    )
tf = datetime.now()


Epoch 1/20


InvalidArgumentError: Graph execution error:

Detected at node gradient_tape/compile_loss/mse/sub/BroadcastGradientArgs defined at (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/runpy.py", line 198, in _run_module_as_main

  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/runpy.py", line 88, in _run_code

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/var/folders/xq/wt557f817vv282xt9gj_2bzh0000gn/T/ipykernel_31076/4258369959.py", line 13, in <module>

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 113, in one_step_on_data

  File "/Users/victormoreno/Desktop/master/TFE/Develop/tfm_env/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 77, in train_step

Incompatible shapes: [16] vs. [16,25]
	 [[{{node gradient_tape/compile_loss/mse/sub/BroadcastGradientArgs}}]] [Op:__inference_multi_step_on_iterator_16422]

In [None]:
#graficando el MAE 
fig = go.Figure()

fig.add_trace(go.Scatter(x=history.epoch
                            ,y=history.history['mae']
                            ,mode='lines'
                            ,name='train'
                            ,line = dict(color='orange')))
fig.add_trace(go.Scatter(x=history.epoch
                            ,y=history.history['val_mae']
                            ,mode='lines'
                            ,name='val'
                            ,line = dict(color='blue')))
fig.update_layout(
    xaxis_title="Epochs",
    yaxis_title="MAE",
    title='Epochs vs MAE'
)
fig.write_html('epochsVSmae.html')
fig.show()


In [15]:
mlflow.log_artifact('epochsVSmae.html',run_id=child_run2.info.run_id,artifact_path='resultados')

In [None]:
#graficando el MAE 
fig = go.Figure()

fig.add_trace(go.Scatter(x=history.epoch
                            ,y=history.history['loss']
                            ,mode='lines'
                            ,name='train'
                            ,line = dict(color='magenta')))
fig.add_trace(go.Scatter(x=history.epoch
                            ,y=history.history['val_loss']
                            ,mode='lines'
                            ,name='val'
                            ,line = dict(color='red')))


fig.update_layout(
    xaxis_title="Epochs",
    yaxis_title="Loss",
    title='Epochs vs Loss'
)
fig.write_html('epochsVSloss.html')
fig.show()


In [17]:
mlflow.log_artifact('epochsVSloss.html',run_id=child_run2.info.run_id,artifact_path='resultados')

In [18]:
# Guardando los parametros en la ejecucion 
parameters = {'data_standarizada':False
              ,'CoeficienteAproximacion':aproximacion
              ,'CoeficienteDetalle':detalle
              ,'puntos_entrenamiento':X_train.shape[0]
              ,'puntos_validacion':x_val.shape[0]
              ,'puntos_testeo':x_test.shape[0]
              ,'fn_activacion_por_capa':fn_activation
              ,'fn_activacion_salida':out_fn_activation
              ,'capas':layers
              ,'window_size':window_size
              ,'loss_metrics':loss_metric
              ,'model_metrics':model_metrics
              ,'epochs':epochs
              ,'batch_size':batch_size
              ,'optimizer':optimizer
              ,'tiempo_entrenamiento':(tf-ti).seconds}

mlflow.log_params(params=parameters
                  ,run_id=child_run2.info.run_id)

In [None]:
#Obteniendo las prediccciones del modelo
train_pred = model.predict(X_train).reshape(-1)
val_pred = model.predict(x_val).reshape(-1)
test_pred = model.predict(x_test).reshape(-1)




In [None]:
#Saving model
signature = mlflow.models.infer_signature(X_train,test_pred)
#log the model
with mlflow.start_run(run_id=child_run2.info.run_id) as run_model:
    mlflow.keras.log_model(model,'model',signature=signature)

In [None]:
train_test = {'train':train_pred,'test':test_pred}

fig = go.Figure()

offset = window_size
date_train = dates[offset:len(train_pred)+offset]
date_val = dates[len(train_pred)+offset:len(train_pred)+len(val_pred)+offset]
date_test = dates[len(train_pred)+len(val_pred)+offset:]

fig.add_trace(go.Scatter(x=dates
                            ,y=data
                            ,mode='lines'
                            ,name='real'
                            ,line = dict(color='green')))

fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred
                            ,mode='lines'
                            ,name='train'
                            ,line=dict(color='blue')))

fig.add_trace(go.Scatter(x=date_val
                            ,y=val_pred
                            ,mode='lines'
                            ,name='val'
                            ,line=dict(color='magenta')))

fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred
                            ,mode='lines'
                            ,name='test'
                            ,line= dict(color='red')))


fig.write_html(model_result_path)
    
fig.show()



In [22]:
#Guardando la imagen del resultado del modelo
mlflow.log_artifact(model_result_path,run_id=child_run2.info.run_id,artifact_path='resultados')

# Guardando metricas

In [None]:

mae_test = mean_absolute_error(y_true=y_test, y_pred=test_pred)
mae_train = mean_absolute_error(y_true=y_train, y_pred=train_pred)
mae_val = mean_absolute_error(y_true=y_val, y_pred=val_pred)

print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}

In [24]:
mlflow.log_metrics(metrics=metrics,run_id=child_run2.info.run_id)

In [24]:
#mlflow.set_tags

In [25]:
#import mlflow
#logged_model = 'runs:/de05435901b8433f80dcb7331e609971/model'

# Load model as a PyFuncModel.
#loaded_model = mlflow.pyfunc.load_model(logged_model)
#m = loaded_model.get_raw_model()
#m.to_json()



In [26]:
#with open('model_json','w+') as f:
#    f.write(m.to_json())