# Importing Libraries

In [12]:
import pandas as pd
from utils import common_functions 
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error
import mlflow
from mlflow.exceptions import RestException
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.backend import clear_session
import pywt


# Iniciando funciones

In [2]:
create_sequences = common_functions().create_sequences

# Leyendo data y limpiandola

In [3]:
path_data = f'../sandbox/pepe.csv'
df = pd.read_csv(path_data,header=0)

In [4]:
#Limpieando la data 

# Convertir "Open time" y "Close time" a datetime y darle formato ISO 8601
df['Open time_date'] = pd.to_datetime(df['Open time'], unit='ms').apply(lambda x: x.isoformat())
df['Close time_date'] = pd.to_datetime(df['Close time'], unit='ms').apply(lambda x: x.isoformat())

#Escalando el valor de el campo Close
df['closex1M'] = df['Close'] * 1000000

# NORMALIZAR LA DATA QUITAR ESA TRASFORMACION

scaler = MinMaxScaler(feature_range=(0, 1))  # O feature_range=(-1, 1)
df['Close_Norm'] = scaler.fit_transform(df['closex1M'].to_numpy().reshape(-1, 1))


In [13]:
coefficients, frequencies = pywt.cwt(df.closex1M, scales=np.arange(1, 128), wavelet="mexh")
print(coefficients.shape)
print(frequencies.shape)

(127, 15340)
(127,)


In [18]:
coeffs_df = pd.DataFrame(coefficients).transpose()
coeffs_df['closex1M'] = df_clean['closex1M']
coeffs_df.shape

(15340, 128)

In [5]:
#seleccionando los campos que necesitamos
select = ['Close time_date','closex1M']
df_clean = df[select]

In [6]:
df_clean.head()

Unnamed: 0,Close time_date,closex1M
0,2023-05-05T18:59:59.999000,3.07
1,2023-05-05T19:59:59.999000,3.35
2,2023-05-05T20:59:59.999000,3.52
3,2023-05-05T21:59:59.999000,3.2
4,2023-05-05T22:59:59.999000,3.33


# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [7]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'SERIET_PLUS_MODELADO' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Juan Carlos Cabrera'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'SERIET_PLUS_MODELADO' already exists.
Full name SERIET_PLUS_MODELADO


## Creando la ejecucion

In [8]:
nombre_hijo = 'LSTM'
nombre_nieto = 'ST_SIN_PREPROCESADO'

with mlflow.start_run(experiment_id=experiment_id
                      ,run_name='PEPE'# El nombre se coloca solo la primera vez
                      #,run_id='e0b18f5efa104f5184b5df9a61728da1'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          ,run_name= nombre_hijo # Acá se ponne el nombre de la ejecución
                          ,nested=True) as run_child:
        with mlflow.start_run(experiment_id=experiment_id
        , run_name= nombre_nieto
        , nested= True) as run_grandchild:
            print(f'Run creada para {nombre_nieto} con run_id',run_grandchild.info.run_id)
        print(f'Run creada para {nombre_hijo} con run_id',run_child.info.run_id)
        

Run creada para ST_SIN_PREPROCESADO con run_id dc3777470adb4895ad226e24b91b5eb3
🏃 View run ST_SIN_PREPROCESADO at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/dc3777470adb4895ad226e24b91b5eb3
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241
Run creada para LSTM con run_id 744c00f790834d6aac332e6453aefac3
🏃 View run LSTM at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/744c00f790834d6aac332e6453aefac3
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241
🏃 View run PEPE at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/cbaff2af09af486a8d7d91fb9e902b05
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241


In [10]:
# Ejemplo
#mlflow.log_artifact(wave_img_path,run_id=run_child.info.run_id,artifact_path='resultados')

# Entrenamiento

## Preparacion de la data

In [7]:
df_train = df_clean[['closex1M']].copy()

window_size = 24
X, Y = create_sequences(df_train,window_size=window_size,target_col='closex1M')

X = X.reshape(X.shape[0],X.shape[1])

Y = Y.reshape(-1,1)

#Espliteando la data
X_train, X_vt, y_train, y_vt = train_test_split(X, Y, test_size=0.3, shuffle=False)

x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)


# Reshape para que sea compatible con LSTM (samples, time steps, features)
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


## Creando modelo

In [8]:
capas =  [('LSTM',10),('LSTM',20),('DENSE',1)]
#fn_activation_by_layer = #['relu','relu','relu','relu']
epochs = 10
batch_size = 16
loss_metric = 'mse'
model_metrics = ['mae']
model_result_path = 'model_results.html'
optimizer = 'adam'

In [11]:
model_result_path = 'model_results.html'

In [12]:
capas = [('LSTM',100),('LSTM',50),('LSTM',25),('DENSE',1)] #[('LSTM',100),('LSTM',50),('LSTM',25),('DENSE',1)]
epochs = 10
batch_size = 16
loss_metric = 'mse'
model_metrics = ['mae']
optimizer = 'adam'

In [13]:
# Limpiamos el grafo computacional
clear_session()

# Crear el modelo secuencial
model = Sequential()

# Construcción dinámica de la red neuronal
for i, (tipo, unidades) in enumerate(capas):
    if tipo == 'LSTM':
        # Primera capa LSTM necesita input_shape
        if i == 0:
            model.add(LSTM(units=unidades,activation='tanh', return_sequences=True, input_shape=(window_size, df_clean.shape[1]-1)))
        # Última capa LSTM antes de Dense no necesita return_sequences
        elif i == len(capas) - 2:
            model.add(LSTM(units=unidades, activation='tanh',return_sequences=False))
        else:
            model.add(LSTM(units=unidades,activation='tanh', return_sequences=True))

    elif tipo == 'DENSE':  # Capa densa final
        model.add(Dense(units=unidades, activation='linear'))

# Compilar el modelo
model.compile(optimizer=optimizer, loss=loss_metric, metrics=model_metrics)

# Mostrar el resumen del modelo
model.summary()




  super().__init__(**kwargs)


In [14]:
#Entrenando el modelo
history = model.fit(X_train,
                    y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(x_val, y_val),
                    verbose=1
                    )

Epoch 1/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 31ms/step - loss: 5.5557 - mae: 1.0373 - val_loss: 22.1944 - val_mae: 3.2549
Epoch 2/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - loss: 0.1418 - mae: 0.1676 - val_loss: 13.8394 - val_mae: 2.2084
Epoch 3/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - loss: 0.0507 - mae: 0.1164 - val_loss: 11.0735 - val_mae: 1.9773
Epoch 4/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - loss: 0.0584 - mae: 0.1239 - val_loss: 9.5105 - val_mae: 1.7278
Epoch 5/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - loss: 0.0224 - mae: 0.0813 - val_loss: 8.4651 - val_mae: 1.6054
Epoch 6/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - loss: 0.0219 - mae: 0.0853 - val_loss: 7.8081 - val_mae: 1.5476
Epoch 7/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s

In [15]:
# Guardando los parametros en la ejecucion 
parameters = {'topology': capas
              ,'data_standarizada':False
              ,'puntos_entrenamiento':X_train.shape[0]
              ,'puntos_validacion':x_val.shape[0]
              ,'puntos_testeo':x_test.shape[0]
              #,'fn_activacion_por_capa':fn_activation_by_layer   # OJO, vamos a usar las funciones de activación por defecto
              ,'window_size':window_size
              ,'loss_metrics':loss_metric
              ,'model_metrics':model_metrics
              ,'epochs':epochs
              ,'batch_size':batch_size
              ,'optimizer':optimizer}

mlflow.log_params(params=parameters
                  ,run_id=run_grandchild.info.run_id)

In [16]:
#Obteniendo las prediccciones del modelo
train_pred = model.predict(X_train)
val_pred = model.predict(x_val)
test_pred = model.predict(x_test)

[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


In [None]:
#df_clean.Close_Norm = scaler.inverse_transform(df_clean.Close_Norm.to_numpy().reshape(-1,1))

In [33]:
#train_pred = scaler.inverse_transform(train_pred.reshape(-1,1)).reshape(-1)
#val_pred = scaler.inverse_transform(val_pred.reshape(-1,1)).reshape(-1)
#test_pred = scaler.inverse_transform(test_pred.reshape(-1,1)).reshape(-1)

In [17]:
train_test = {'train':train_pred,'test':test_pred}

fig = go.Figure()

dates = df_clean['Close time_date']

date_train = dates[25:len(train_pred)]
date_val = dates[len(train_pred)+25:len(train_pred)+len(val_pred)+25]
date_test = dates[len(train_pred)+len(val_pred)+25:]


fig.add_trace(go.Scatter(x=dates
                            ,y=df_clean.closex1M
                            ,mode='lines'
                            ,name='real'
                            ,line = dict(color='green')))

fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred.reshape(-1)
                            ,mode='lines'
                            ,name='train'   
                            ,line=dict(color='blue')))

fig.add_trace(go.Scatter(x=date_val
                            ,y=val_pred.reshape(-1)
                            ,mode='lines'
                            ,name='val'
                            ,line=dict(color='magenta')))

fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred.reshape(-1)
                            ,mode='lines'
                            ,name='test'
                            ,line= dict(color='red')))


fig.write_html(model_result_path)
    
fig.show()



In [140]:
#Guardando la imagen del resultado del modelo
mlflow.log_artifact(model_result_path,run_id=run_child.info.run_id,artifact_path='resultados')

# Guardando metricas

In [141]:

mae_test = mean_absolute_error(y_true=y_test, y_pred=test_pred)
mae_train = mean_absolute_error(y_true=y_train, y_pred=train_pred)
mae_val = mean_absolute_error(y_true=y_val, y_pred=val_pred)


print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}

MAE train: 0.07801761497591495,   MAE val: 0.22495412451987398,  MAE test: 0.34330152859895136


In [142]:
mlflow.log_metrics(metrics=metrics,run_id=run_child.info.run_id)

In [21]:
#mlflow.set_tags