# Importing Libraries

In [17]:
import pandas as pd
from utils import common_functions 
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error
import mlflow
from mlflow.exceptions import RestException
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.backend import clear_session
from prophet import Prophet


# Iniciando funciones

In [18]:
create_sequences = common_functions().create_sequences

# Leyendo data y limpiandola

In [19]:
path_data = f'../SERIET_PLUS_MODELADO/datos_DOGEUSDT.csv'
df = pd.read_csv(path_data,header=0)

In [288]:
#Limpieando la data 

# Convertir "Open time" y "Close time" a datetime y darle formato ISO 8601
df['Open time_date'] = pd.to_datetime(df['Open time'], unit='ms').apply(lambda x: x.isoformat())
df['Close time_date'] = pd.to_datetime(df['Close time'], unit='ms').apply(lambda x: x.isoformat())

#Escalando el valor de el campo Close
df['closex1M'] = df['Close'] * 100

# NORMALIZAR LA DATA QUITAR ESA TRASFORMACION

#scaler = MinMaxScaler(feature_range=(0, 1))  # O feature_range=(-1, 1)
#df['closex1M'] = scaler.fit_transform(df['closex1M'].to_numpy().reshape(-1, 1))


In [289]:
df['closex1M'] = np.log1p(df['closex1M'])
#np.log1p(df['closex1M'])

In [290]:
#seleccionando los campos que necesitamos
select = ['Close time_date','closex1M']
df_clean = df[select]

In [291]:
df_clean.shape

(15790, 2)

In [292]:
df_clean.tail()

Unnamed: 0,Close time_date,closex1M
15785,2025-02-02T22:59:59.999000,3.314368
15786,2025-02-02T23:59:59.999000,3.325324
15787,2025-02-03T00:59:59.999000,3.275785
15788,2025-02-03T01:59:59.999000,3.17484
15789,2025-02-03T02:59:59.999000,3.169644


In [293]:
df_train = df_clean[['closex1M']].copy()

window_size = 24
X, Y = create_sequences(df_train,window_size=window_size,target_col='closex1M')

X = X.reshape(X.shape[0],X.shape[1])

Y = Y.reshape(-1,1)

#Espliteando la data
X_train, X_vt, y_train, y_vt = train_test_split(X, Y, test_size=0.3, shuffle=False)

x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)


# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [294]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'SERIET_PLUS_MODELADO' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Juan Carlos Cabrera'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'SERIET_PLUS_MODELADO' already exists.
Full name SERIET_PLUS_MODELADO


## Creando la ejecucion

In [295]:
nombre_hijo = 'LSTM'
nombre_nieto = 'ST_T_LOGATIRMICA'    

with mlflow.start_run(experiment_id=experiment_id
                      #,run_name='DOGE'# El nombre se coloca solo la primera vez
                      ,run_id='625b17f3353949748ca9a3e1115fb9dd'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          #,run_name= nombre_hijo # Acá se ponne el nombre de la ejecución
                          , run_id= 'd7751b0fc5674e0684953cecee0bfdfd'
                          ,nested=True) as run_child:
        with mlflow.start_run(experiment_id=experiment_id
        , run_name= nombre_nieto
        , nested= True) as run_grandchild:
            print(f'Run creada para {nombre_nieto} con run_id',run_grandchild.info.run_id)
        print(f'Run creada para {nombre_hijo} con run_id',run_child.info.run_id)
        

Run creada para ST_T_LOGATIRMICA con run_id 8d83f944ccaf441aa6b8714f17f976d8
🏃 View run ST_T_LOGATIRMICA at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/8d83f944ccaf441aa6b8714f17f976d8
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241
Run creada para LSTM con run_id d7751b0fc5674e0684953cecee0bfdfd
🏃 View run LSTM at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/d7751b0fc5674e0684953cecee0bfdfd
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241
🏃 View run DOGE at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/625b17f3353949748ca9a3e1115fb9dd
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241


In [296]:
# Ejemplo
#mlflow.log_artifact(wave_img_path,run_id=run_child.info.run_id,artifact_path='resultados')

# Entrenamiento

## Preparacion de la data

In [297]:


# Reshape para que sea compatible con LSTM (samples, time steps, features)
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


## Creando modelo

In [298]:
#capas =  #[('LSTM',100),('LSTM',50),('LSTM',25),('DENSE',1)]
#fn_activation_by_layer = #['relu','relu','relu','relu']
#epochs = 10
#batch_size = 16
#loss_metric = 'mse'
#model_metrics = ['mae']
#model_result_path = 'model_results.html'
#optimizer = 'adam'

In [299]:
model_result_path = 'model_results.html'

In [301]:
capas = [('LSTM',50),('LSTM',100),('LSTM',150),('DENSE',1)]
#[('LSTM',100),('LSTM',50),('LSTM',25),('DENSE',1)] #[('LSTM',100),('LSTM',50),('LSTM',25),('DENSE',1)]
epochs = 50
batch_size = 16
loss_metric = 'mse'
model_metrics = ['mae']
optimizer = 'adam'

In [302]:
len(capas)

4

In [303]:
# Limpiamos el grafo computacional
clear_session()

# Crear el modelo secuencial
model = Sequential()

# Construcción dinámica de la red neuronal
for i, (tipo, unidades) in enumerate(capas):
    if tipo == 'LSTM':
        # Primera capa LSTM necesita input_shape
        if i == 0:
            model.add(LSTM(units=unidades,activation='tanh', return_sequences=True, input_shape=(window_size, df_clean.shape[1]-1)))
        # Última capa LSTM antes de Dense no necesita return_sequences
        elif i == len(capas) - 2:
            model.add(LSTM(units=unidades, activation='tanh',return_sequences=False))
        else:
            model.add(LSTM(units=unidades,activation='tanh', return_sequences=True))

    elif tipo == 'DENSE':  # Capa densa final
        model.add(Dense(units=unidades, activation='linear'))

# Compilar el modelo
model.compile(optimizer=optimizer, loss=loss_metric, metrics=model_metrics)

# Mostrar el resumen del modelo
model.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [304]:
#Entrenando el modelo
history = model.fit(X_train,
                    y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(x_val, y_val),
                    verbose=1
                    )

Epoch 1/50
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 23ms/step - loss: 0.1737 - mae: 0.1733 - val_loss: 0.0107 - val_mae: 0.0799
Epoch 2/50
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 22ms/step - loss: 8.5989e-04 - mae: 0.0219 - val_loss: 0.0061 - val_mae: 0.0616
Epoch 3/50
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 23ms/step - loss: 0.0011 - mae: 0.0247 - val_loss: 0.0043 - val_mae: 0.0470
Epoch 4/50
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 23ms/step - loss: 0.0010 - mae: 0.0242 - val_loss: 0.0026 - val_mae: 0.0360
Epoch 5/50
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 23ms/step - loss: 7.9545e-04 - mae: 0.0213 - val_loss: 0.0038 - val_mae: 0.0514
Epoch 6/50
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 23ms/step - loss: 8.9559e-04 - mae: 0.0229 - val_loss: 0.0015 - val_mae: 0.0287
Epoch 7/50
[1m690/690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [305]:
# Guardando los parametros en la ejecucion 
parameters = {'topology': capas
              ,'data_standarizada':False
              ,'puntos_entrenamiento':X_train.shape[0]
              ,'puntos_validacion':x_val.shape[0]
              ,'puntos_testeo':x_test.shape[0]
              #,'fn_activacion_por_capa':fn_activation_by_layer   # OJO, vamos a usar las funciones de activación por defecto
              ,'window_size':window_size
              ,'loss_metrics':loss_metric
              ,'model_metrics':model_metrics
              ,'epochs':epochs
              ,'batch_size':batch_size
              ,'optimizer':optimizer}

mlflow.log_params(params=parameters
                  ,run_id=run_grandchild.info.run_id)

In [306]:
#Obteniendo las prediccciones del modelo
train_pred = model.predict(X_train)
val_pred = model.predict(x_val)
test_pred = model.predict(x_test)

[1m345/345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


In [307]:
#df_clean.closex1M = scaler.inverse_transform(df_clean.closex1M.to_numpy().reshape(-1,1))

df_clean.closex1M  = np.expm1(df['closex1M'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [308]:
df_clean.closex1M.shape

(15790,)

In [309]:
test_pred.shape

(473, 1)

In [310]:
#train_pred = scaler.inverse_transform(train_pred.reshape(-1,1))#.reshape(-1)
#val_pred = scaler.inverse_transform(val_pred.reshape(-1,1))#.reshape(-1)
#test_pred = scaler.inverse_transform(test_pred.reshape(-1,1))#.reshape(-1)

In [311]:
train_pred = np.expm1(train_pred.reshape(-1,1))#.reshape(-1)
val_pred = np.expm1(val_pred.reshape(-1,1))#.reshape(-1)
test_pred = np.expm1(test_pred.reshape(-1,1))#.reshape(-1)

In [312]:
train_test = {'train':train_pred,'test':test_pred}

fig = go.Figure()

dates = df_clean['Close time_date']

date_train = dates[25:len(train_pred)]
date_val = dates[len(train_pred)+25:len(train_pred)+len(val_pred)+25]
date_test = dates[len(train_pred)+len(val_pred)+25:]


fig.add_trace(go.Scatter(x=dates
                            ,y=df_clean.closex1M
                            ,mode='lines'
                            ,name='real'
                            ,line = dict(color='green')))

fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred.reshape(-1)
                            ,mode='lines'
                            ,name='train'   
                            ,line=dict(color='blue')))

fig.add_trace(go.Scatter(x=date_val
                            ,y=val_pred.reshape(-1)
                            ,mode='lines'
                            ,name='val'
                            ,line=dict(color='magenta')))

fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred.reshape(-1)
                            ,mode='lines'
                            ,name='test'
                            ,line= dict(color='red')))


fig.write_html(model_result_path)
    
fig.show()



In [313]:
#Guardando la imagen del resultado del modelo
mlflow.log_artifact(model_result_path,run_id=run_grandchild.info.run_id,artifact_path='resultados')

# Guardando metricas

In [314]:
y_test.shape

(473, 1)

In [315]:
test_pred.shape

(473, 1)

In [316]:
'''
mae_test = mean_absolute_error(y_true= scaler.inverse_transform(y_test), y_pred=test_pred)
mae_train = mean_absolute_error(y_true= scaler.inverse_transform(y_train), y_pred=train_pred.reshape(-1))
mae_val = mean_absolute_error(y_true= scaler.inverse_transform(y_val), y_pred=val_pred.reshape(-1))


print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}'''

'\nmae_test = mean_absolute_error(y_true= scaler.inverse_transform(y_test), y_pred=test_pred)\nmae_train = mean_absolute_error(y_true= scaler.inverse_transform(y_train), y_pred=train_pred.reshape(-1))\nmae_val = mean_absolute_error(y_true= scaler.inverse_transform(y_val), y_pred=val_pred.reshape(-1))\n\n\nprint(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")\n\nmetrics = {\'mae_train\':mae_train,\'mae_val\':mae_val,\'mae_test\':mae_test}'

In [317]:

mae_test = mean_absolute_error(y_true= np.expm1(y_test), y_pred=test_pred)
mae_train = mean_absolute_error(y_true= np.expm1(y_train), y_pred=train_pred.reshape(-1))
mae_val = mean_absolute_error(y_true= np.expm1(y_val), y_pred=val_pred.reshape(-1))


print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}

MAE train: 0.08719135685699834,   MAE val: 0.8618944106743822,  MAE test: 1.5591612004931332


In [149]:
'''
mae_test = mean_absolute_error(y_true= y_test, y_pred=test_pred)
mae_train = mean_absolute_error(y_true= y_train, y_pred=train_pred.reshape(-1))
mae_val = mean_absolute_error(y_true= y_val, y_pred=val_pred.reshape(-1))


print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}'''

MAE train: 0.002270199128773162,   MAE val: 0.01394352112259415,  MAE test: 0.024653762954151486


In [318]:
mlflow.log_metrics(metrics=metrics,run_id=run_grandchild.info.run_id)

In [217]:
#mlflow.set_tags