# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error
import mlflow
from mlflow.exceptions import RestException
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.backend import clear_session
import pywt


# Leyendo data y limpiandola

In [2]:
path_data = f'../sandbox/pepe.csv'
df = pd.read_csv(path_data,header=0)

In [3]:
#Limpieando la data 

# Convertir "Open time" y "Close time" a datetime y darle formato ISO 8601
df['Open time_date'] = pd.to_datetime(df['Open time'], unit='ms').apply(lambda x: x.isoformat())
df['Close time_date'] = pd.to_datetime(df['Close time'], unit='ms').apply(lambda x: x.isoformat())

#Escalando el valor de el campo Close
df['closex1M'] = df['Close'] * 1000000

# NORMALIZAR LA DATA QUITAR ESA TRASFORMACION

scaler = MinMaxScaler(feature_range=(0, 1))  # O feature_range=(-1, 1)
df['Close_Norm'] = scaler.fit_transform(df['closex1M'].to_numpy().reshape(-1, 1))


In [4]:
scales = (1,256)

In [5]:
coefficients, frequencies = pywt.cwt(df.closex1M, scales=np.arange(1, 256), wavelet="mexh")
print(coefficients.shape)
print(frequencies.shape)

(255, 15340)
(255,)


In [6]:
coeffs_df = pd.DataFrame(coefficients).transpose()
coeffs_df.columns = coeffs_df.columns.map(str)
coeffs_df['closex1M'] = df['closex1M']
scaler_w = MinMaxScaler(feature_range=(-1, 1)) 
# Normalizar todas las columnas
X_normalized = scaler_w.fit_transform(coeffs_df)

# Convertir el resultado nuevamente en un DataFrame
coeffs_df = pd.DataFrame(X_normalized, columns=coeffs_df.columns)
coeffs_df.shape

(15340, 256)

In [7]:
coeffs_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,closex1M
0,-0.461662,-0.370946,-0.278659,-0.19902,-0.171362,-0.166711,-0.158524,-0.14649,-0.12745,-0.108589,...,0.035841,0.036123,0.032728,0.033332,0.032059,0.029367,0.02823,0.027944,0.02737,-0.816828
1,-0.007581,-0.046946,-0.052455,-0.030703,-0.037447,-0.057355,-0.069812,-0.069668,-0.060175,-0.051231,...,0.035863,0.035936,0.032731,0.033536,0.031938,0.028469,0.027216,0.027805,0.025976,-0.795979
2,-0.162089,0.086289,0.123879,0.114102,0.082187,0.036592,0.012961,0.002694,0.002754,0.011034,...,0.039276,0.039516,0.035966,0.035899,0.035407,0.033047,0.032095,0.031324,0.029573,-0.783321
3,-0.393893,0.017476,0.201648,0.212622,0.168647,0.119912,0.083594,0.067249,0.060455,0.062177,...,0.038218,0.038554,0.035386,0.036996,0.03658,0.0326,0.031563,0.031347,0.02939,-0.807148
4,-0.486516,-0.107787,0.191252,0.253948,0.225596,0.17814,0.144284,0.12123,0.110737,0.115595,...,0.04055,0.040679,0.037151,0.037374,0.036565,0.033763,0.032538,0.032406,0.03184,-0.797468


# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [8]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'WV_PLUS_MODEL' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Victor Moreno'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'WV_PLUS_MODEL' already exists.
Full name WV_PLUS_MODEL


## Creando la ejecucion

In [9]:
nombre_hijo = 'LSTM'
nombre_nieto = 'WVC_1-512'

with mlflow.start_run(experiment_id=experiment_id
                      #,run_name='PEPE'# El nombre se coloca solo la primera vez
                      ,run_id='963dc6ab70094271baf0f267a4e80566'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          #,run_name= nombre_hijo # Acá se ponne el nombre de la ejecución
                          , run_id = "713eabcc56814f22b75907f71cf6ef28"
                          ,nested=True) as run_child:
        with mlflow.start_run(experiment_id=experiment_id
        , run_name= nombre_nieto
        , nested= True) as run_grandchild:
            print(f'Run creada para {nombre_nieto} con run_id',run_grandchild.info.run_id)
        print(f'Run creada para {nombre_hijo} con run_id',run_child.info.run_id)
        

Run creada para WVC_1-512 con run_id ee2a6339d64d4c589f7d86f29255345e
🏃 View run WVC_1-512 at: http://34.58.215.162:8080/#/experiments/526235632293394123/runs/ee2a6339d64d4c589f7d86f29255345e
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/526235632293394123
Run creada para LSTM con run_id 713eabcc56814f22b75907f71cf6ef28
🏃 View run LSTM at: http://34.58.215.162:8080/#/experiments/526235632293394123/runs/713eabcc56814f22b75907f71cf6ef28
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/526235632293394123
🏃 View run PEPE at: http://34.58.215.162:8080/#/experiments/526235632293394123/runs/963dc6ab70094271baf0f267a4e80566
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/526235632293394123


In [10]:
# Ejemplo
#mlflow.log_artifact(wave_img_path,run_id=run_child.info.run_id,artifact_path='resultados')

# Entrenamiento

## Preparacion de la data

In [10]:
coeffs_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,closex1M
0,-0.461662,-0.370946,-0.278659,-0.19902,-0.171362,-0.166711,-0.158524,-0.14649,-0.12745,-0.108589,...,0.035841,0.036123,0.032728,0.033332,0.032059,0.029367,0.02823,0.027944,0.02737,-0.816828
1,-0.007581,-0.046946,-0.052455,-0.030703,-0.037447,-0.057355,-0.069812,-0.069668,-0.060175,-0.051231,...,0.035863,0.035936,0.032731,0.033536,0.031938,0.028469,0.027216,0.027805,0.025976,-0.795979
2,-0.162089,0.086289,0.123879,0.114102,0.082187,0.036592,0.012961,0.002694,0.002754,0.011034,...,0.039276,0.039516,0.035966,0.035899,0.035407,0.033047,0.032095,0.031324,0.029573,-0.783321
3,-0.393893,0.017476,0.201648,0.212622,0.168647,0.119912,0.083594,0.067249,0.060455,0.062177,...,0.038218,0.038554,0.035386,0.036996,0.03658,0.0326,0.031563,0.031347,0.02939,-0.807148
4,-0.486516,-0.107787,0.191252,0.253948,0.225596,0.17814,0.144284,0.12123,0.110737,0.115595,...,0.04055,0.040679,0.037151,0.037374,0.036565,0.033763,0.032538,0.032406,0.03184,-0.797468


In [12]:
df_train = coeffs_df.copy()
# Crear secuencias para la LSTM
timesteps = 24
X, y = [], []
for i in range(len(df_train) - timesteps):
    X.append(df_train[i:i+timesteps])
    y.append(df.Close_Norm[i+timesteps])  # Valor futuro
X, y = np.array(X), np.array(y)

In [13]:
df_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,closex1M
0,-0.461662,-0.370946,-0.278659,-0.19902,-0.171362,-0.166711,-0.158524,-0.14649,-0.12745,-0.108589,...,0.035841,0.036123,0.032728,0.033332,0.032059,0.029367,0.02823,0.027944,0.02737,-0.816828
1,-0.007581,-0.046946,-0.052455,-0.030703,-0.037447,-0.057355,-0.069812,-0.069668,-0.060175,-0.051231,...,0.035863,0.035936,0.032731,0.033536,0.031938,0.028469,0.027216,0.027805,0.025976,-0.795979
2,-0.162089,0.086289,0.123879,0.114102,0.082187,0.036592,0.012961,0.002694,0.002754,0.011034,...,0.039276,0.039516,0.035966,0.035899,0.035407,0.033047,0.032095,0.031324,0.029573,-0.783321
3,-0.393893,0.017476,0.201648,0.212622,0.168647,0.119912,0.083594,0.067249,0.060455,0.062177,...,0.038218,0.038554,0.035386,0.036996,0.03658,0.0326,0.031563,0.031347,0.02939,-0.807148
4,-0.486516,-0.107787,0.191252,0.253948,0.225596,0.17814,0.144284,0.12123,0.110737,0.115595,...,0.04055,0.040679,0.037151,0.037374,0.036565,0.033763,0.032538,0.032406,0.03184,-0.797468


In [14]:


window_size = 24
#X, Y = create_sequences(df_train,window_size=window_size,target_col='closex1M')

#X = X.reshape(X.shape[0],X.shape[1])

#Y = Y.reshape(-1,1)

#Espliteando la data
X_train, X_vt, y_train, y_vt = train_test_split(X, y, test_size=0.3, shuffle=False)

x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)


# Reshape para que sea compatible con LSTM (samples, time steps, features)
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


## Creando modelo

In [18]:
capas =  [('LSTM',50),('LSTM',100),('LSTM',150),('DENSE',1)]
#fn_activation_by_layer = #['relu','relu','relu','relu']
epochs = 45
batch_size = 128
loss_metric = 'mse'
model_metrics = ['mae']
model_result_path = 'model_results.html'
optimizer = 'adam'

In [270]:
scales[1]

256

In [48]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
clear_session()
# Definición del modelo
model = Sequential()

# 🔹 Capa convolucional 1D
model.add(Conv1D(filters=128, kernel_size=3, activation='tanh', input_shape=(window_size, df_train.shape[1])))
model.add(MaxPooling1D(pool_size=2))

# 🔹 Segunda capa convolucional (opcional)
model.add(Conv1D(filters=64, kernel_size=3, activation='tanh'))
model.add(MaxPooling1D(pool_size=2))

# 🔹 Capa LSTM para capturar patrones a largo plazo
model.add(LSTM(units=25, return_sequences=True))
model.add(LSTM(units=100, return_sequences=True))

model.add(LSTM(units=150, return_sequences=False))

# 🔹 Capas densas para la predicción final
model.add(Dense(200, activation='relu'))

model.add(Dense(100, activation='relu'))
model.add(Dense(1, activation='linear'))  # Salida para regresión

# Compilación del modelo
model.compile(optimizer = Adam(learning_rate=0.000001), loss='mse', metrics=['mae'])

# Resumen del modelo
model.summary()


In [258]:
# Limpiamos el grafo computacional
clear_session()

# Crear el modelo secuencial
model = Sequential()

# Construcción dinámica de la red neuronal
for i, (tipo, unidades) in enumerate(capas):
    if tipo == 'LSTM':
        # Primera capa LSTM necesita input_shape
        if i == 0:
            model.add(LSTM(units=unidades,activation='tanh', return_sequences=True, input_shape=(window_size, df_train.shape[1])))
        # Última capa LSTM antes de Dense no necesita return_sequences
        elif i == len(capas) - 2:
            model.add(LSTM(units=unidades, activation='tanh',return_sequences=False))
        else:
            model.add(LSTM(units=unidades,activation='tanh', return_sequences=True))

    elif tipo == 'DENSE':  # Capa densa final
        model.add(Dense(units=unidades, activation='linear'))

# Compilar el modelo
model.compile(optimizer = Adam(learning_rate=0.0001) , loss=loss_metric, metrics=model_metrics)

# Mostrar el resumen del modelo
model.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [259]:
coeffs_df.shape

(15340, 512)

In [260]:
X_train.shape

(10721, 24, 512)

In [49]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# 🔹 EarlyStopping: Detiene el entrenamiento cuando la métrica no mejora
early_stopping = EarlyStopping(
    monitor='val_loss',  # Métrica a monitorear
    patience=10,         # Número de épocas sin mejora antes de detener el entrenamiento
    restore_best_weights=True  # Restaura los pesos de la mejor época
)

# 🔹 ReduceLROnPlateau: Reduce la tasa de aprendizaje cuando la métrica se estanca
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',  # Métrica a monitorear
    factor=0.5,          # Factor por el cual se reducirá la tasa de aprendizaje
    patience=5,          # Número de épocas sin mejora antes de reducir el learning rate
    min_lr=1e-14         # Tasa de aprendizaje mínima permitida
)

In [50]:
#Entrenando el modelo
history = model.fit(X_train,
                    y_train,
                    epochs=2000,
                    batch_size=batch_size,
                    callbacks=[early_stopping,reduce_lr],
                    validation_data=(x_val, y_val),
                    verbose=1
                    )

Epoch 1/2000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 35ms/step - loss: 0.0431 - mae: 0.1321 - val_loss: 0.2643 - val_mae: 0.4674 - learning_rate: 1.0000e-06
Epoch 2/2000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 0.0423 - mae: 0.1282 - val_loss: 0.2593 - val_mae: 0.4621 - learning_rate: 1.0000e-06
Epoch 3/2000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - loss: 0.0408 - mae: 0.1233 - val_loss: 0.2545 - val_mae: 0.4567 - learning_rate: 1.0000e-06
Epoch 4/2000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 33ms/step - loss: 0.0389 - mae: 0.1164 - val_loss: 0.2497 - val_mae: 0.4515 - learning_rate: 1.0000e-06
Epoch 5/2000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - loss: 0.0379 - mae: 0.1143 - val_loss: 0.2452 - val_mae: 0.4466 - learning_rate: 1.0000e-06
Epoch 6/2000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - loss: 

In [249]:
# Guardando los parametros en la ejecucion 
parameters = {'topology': capas
              ,'data_normalized':True
              ,'puntos_entrenamiento':X_train.shape[0]
              ,'puntos_validacion':x_val.shape[0]
              ,'puntos_testeo':x_test.shape[0]
              #,'fn_activacion_por_capa':fn_activation_by_layer   # OJO, vamos a usar las funciones de activación por defecto
              ,'window_size':window_size
              ,'loss_metrics':loss_metric
              ,'model_metrics':model_metrics
              ,'epochs':epochs
              ,'batch_size':batch_size
              ,'optimizer':optimizer
              , 'scales' : scales}

mlflow.log_params(params=parameters
                  ,run_id=run_grandchild.info.run_id)

In [51]:
#Obteniendo las prediccciones del modelo
train_pred = model.predict(X_train)
val_pred = model.predict(x_val)
test_pred = model.predict(x_test)

[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [376]:
#df_clean.Close_Norm = scaler.inverse_transform(df_clean.Close_Norm.to_numpy().reshape(-1,1))

In [52]:
train_pred = scaler.inverse_transform(train_pred.reshape(-1,1)).reshape(-1)
val_pred = scaler.inverse_transform(val_pred.reshape(-1,1)).reshape(-1)
test_pred = scaler.inverse_transform(test_pred.reshape(-1,1)).reshape(-1)

In [53]:
train_test = {'train':train_pred,'test':test_pred}

fig = go.Figure()

dates = df['Close time_date']

date_train = dates[25:len(train_pred)]
date_val = dates[len(train_pred)+25:len(train_pred)+len(val_pred)+25]
date_test = dates[len(train_pred)+len(val_pred)+25:]


fig.add_trace(go.Scatter(x=dates
                            ,y=df.closex1M
                            ,mode='lines'
                            ,name='real'
                            ,line = dict(color='green')))

fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred.reshape(-1)
                            ,mode='lines'
                            ,name='train'   
                            ,line=dict(color='blue')))

fig.add_trace(go.Scatter(x=date_val
                            ,y=val_pred.reshape(-1)
                            ,mode='lines'
                            ,name='val'
                            ,line=dict(color='magenta')))

fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred.reshape(-1)
                            ,mode='lines'
                            ,name='test'
                            ,line= dict(color='red')))


fig.write_html(model_result_path)
    
fig.show()



In [385]:
#Guardando la imagen del resultado del modelo
mlflow.log_artifact(model_result_path,run_id=run_grandchild.info.run_id,artifact_path='resultados')

KeyboardInterrupt: 

# Guardando metricas

In [54]:
mae_test = mean_absolute_error(y_true= scaler.inverse_transform(y_test.reshape(1, -1)), y_pred=test_pred.reshape(1, -1))
mae_train = mean_absolute_error(y_true= scaler.inverse_transform(y_train.reshape(1, -1)), y_pred=train_pred.reshape(1, -1))
mae_val = mean_absolute_error(y_true= scaler.inverse_transform(y_val.reshape(1, -1)), y_pred=val_pred.reshape(1, -1))


print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}

MAE train: 0.1559902156972694,   MAE val: 1.8512199934725317,  MAE test: 1.9569874201650206


In [224]:

mae_test = mean_absolute_error(y_true=y_test, y_pred=test_pred)
mae_train = mean_absolute_error(y_true=y_train, y_pred=train_pred)
mae_val = mean_absolute_error(y_true=y_val, y_pred=val_pred)


print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}

MAE train: 3.9427681579402885,   MAE val: 11.20865869436152,  MAE test: 13.741178217975321


In [255]:
mlflow.log_metrics(metrics=metrics,run_id=run_grandchild.info.run_id)

In [21]:
#mlflow.set_tags