# Importing Libraries

In [1]:
import pandas as pd
from utils import common_functions 
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error
import mlflow
from mlflow.exceptions import RestException

# Iniciando funciones

In [9]:
create_sequences = common_functions().create_sequences

# Leyendo data y limpiandola

In [10]:
path_data = f'../sandbox/pepe.csv'
df = pd.read_csv(path_data,header=0)

In [11]:
#Limpieando la data 

# Convertir "Open time" y "Close time" a datetime y darle formato ISO 8601
df['Open time_date'] = pd.to_datetime(df['Open time'], unit='ms').apply(lambda x: x.isoformat())
df['Close time_date'] = pd.to_datetime(df['Close time'], unit='ms').apply(lambda x: x.isoformat())

#Escalando el valor de el campo Close
df['closex1M'] = df['Close'] * 1000000

# NORMALIZAR LA DATA QUITAR ESA TRASFORMACION


In [12]:
#seleccionando los campos que necesitamos
select = ['Close time_date','Close']
df_clean = df[select]

# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [13]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'SERIET_PLUS_MODELADO' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Juan Carlos Cabrera'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'SERIET_PLUS_MODELADO' already exists.
Full name SERIET_PLUS_MODELADO


## Creando la ejecucion

In [14]:
with mlflow.start_run(experiment_id=experiment_id
                      #,run_name='LSTM'# El nombre se coloca solo la primera vez
                      ,run_id='e0b18f5efa104f5184b5df9a61728da1'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          ,run_name='SERIE TEMPORAL SIN PREPROCESADO' # Acá se ponne el nombre de la ejecución
                          ,nested=True) as run_child:
        print('Run creada para SERIE TEMPORAL SIN PREPROCESADO con run_id',run_child.info.run_id)

Run creada para SERIE TEMPORAL SIN PREPROCESADO con run_id a5e84c39be0b4911958033e53a8d9674
🏃 View run SERIE TEMPORAL SIN PREPROCESADO at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/a5e84c39be0b4911958033e53a8d9674
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241
🏃 View run LSTM at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/e0b18f5efa104f5184b5df9a61728da1
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241


In [10]:
# Ejemplo
#mlflow.log_artifact(wave_img_path,run_id=run_child.info.run_id,artifact_path='resultados')

# Entrenamiento

## Preparacion de la data

In [31]:
df_train = df_clean[['Close']].copy()

window_size = 20
X, Y = create_sequences(df_train,window_size=window_size,target_col='Close')

X = X.reshape(X.shape[0],X.shape[1])

Y = Y.reshape(-1,1)

#Espliteando la data
X_train, X_vt, y_train, y_vt = train_test_split(X, Y, test_size=0.3, shuffle=False)

x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)


# Reshape para que sea compatible con LSTM (samples, time steps, features)
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


## Creando modelo

In [17]:
fn_activation_by_layer = ['relu','relu','relu','relu']
epochs = 10
batch_size = 16
loss_metric = 'mse'
model_metrics = ['mae']
model_result_path = 'model_results.html'
optimizer = 'adam'

In [33]:
model = Sequential([
    Input(shape=(window_size,1)),
    LSTM(100,activation=fn_activation_by_layer[0],return_sequences=True),
    LSTM(50,activation=fn_activation_by_layer[1],return_sequences=True),
    LSTM(25,activation=fn_activation_by_layer[2],return_sequences=False),
    Dense(1,activation=fn_activation_by_layer[3])
])
model.compile(optimizer=optimizer,loss=loss_metric,metrics=model_metrics)

#Entrenando el modelo
history = model.fit(X_train,
                    y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(x_val, y_val),
                    verbose=1
                    )

Epoch 1/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 22ms/step - loss: 3.3678e-11 - mae: 3.9350e-06 - val_loss: 2.0503e-10 - val_mae: 1.3115e-05
Epoch 2/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 21ms/step - loss: 3.3451e-11 - mae: 3.9188e-06 - val_loss: 2.0503e-10 - val_mae: 1.3115e-05
Epoch 3/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 21ms/step - loss: 3.3891e-11 - mae: 3.9434e-06 - val_loss: 2.0503e-10 - val_mae: 1.3115e-05
Epoch 4/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 21ms/step - loss: 3.5007e-11 - mae: 4.0193e-06 - val_loss: 2.0503e-10 - val_mae: 1.3115e-05
Epoch 5/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 21ms/step - loss: 3.3577e-11 - mae: 3.9263e-06 - val_loss: 2.0503e-10 - val_mae: 1.3115e-05
Epoch 6/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 22ms/step - loss: 3.3544e-11 - mae: 3.9431e-06 - val_loss: 2.0503e-10 - 

In [34]:
# Guardando los parametros en la ejecucion 
parameters = {'data_standarizada':False
              ,'puntos_entrenamiento':X_train.shape[0]
              ,'puntos_validacion':x_val.shape[0]
              ,'puntos_testeo':x_test.shape[0]
              ,'fn_activacion_por_capa':fn_activation_by_layer
              ,'window_size':window_size
              ,'loss_metrics':loss_metric
              ,'model_metrics':model_metrics
              ,'epochs':epochs
              ,'batch_size':batch_size
              ,'optimizer':optimizer}

mlflow.log_params(params=parameters
                  ,run_id=run_child.info.run_id)

In [35]:
#Obteniendo las prediccciones del modelo
train_pred = model.predict(X_train).reshape(-1)
val_pred = model.predict(x_val).reshape(-1)
test_pred = model.predict(x_test).reshape(-1)

[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


In [56]:
len(test_pred)+len(train_pred)+len(val_pred)-20

15300

In [59]:
df_clean.iloc[1:3,:]

Unnamed: 0,Close time_date,Close
1,2023-05-05T19:59:59.999000,3e-06
2,2023-05-05T20:59:59.999000,4e-06


In [62]:
train_test = {'train':train_pred,'test':test_pred}

fig = go.Figure()

dates = df_clean['Close time_date']

date_train = dates[20:len(train_pred)]
date_val = dates[len(train_pred)+20:len(train_pred)+len(val_pred)+20]
date_test = dates[len(train_pred)+len(val_pred)+20:]

fig.add_trace(go.Scatter(x=dates
                            ,y=df_clean.Close
                            ,mode='lines'
                            ,name='real'
                            ,line = dict(color='green')))

fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred
                            ,mode='lines'
                            ,name='train'
                            ,line=dict(color='blue')))

fig.add_trace(go.Scatter(x=date_val
                            ,y=val_pred
                            ,mode='lines'
                            ,name='val'
                            ,line=dict(color='magenta')))

fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred
                            ,mode='lines'
                            ,name='test'
                            ,line= dict(color='red')))


fig.write_html(model_result_path)
    
fig.show()



In [39]:
#Guardando la imagen del resultado del modelo
mlflow.log_artifact(model_result_path,run_id=run_child.info.run_id,artifact_path='resultados')

# Guardando metricas

In [40]:

mae_test = mean_absolute_error(y_true=y_test, y_pred=test_pred)
mae_train = mean_absolute_error(y_true=y_train, y_pred=train_pred)
mae_val = mean_absolute_error(y_true=y_val, y_pred=val_pred)

print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_val':mae_val,'mae_test':mae_test}

MAE train: 3.971216896680344e-06,   MAE val: 1.3114540618955512e-05,  MAE test: 1.5266478260869566e-05


In [41]:
mlflow.log_metrics(metrics=metrics,run_id=run_child.info.run_id)

In [21]:
#mlflow.set_tags