# Importing Libraries

In [281]:
import pandas as pd
from utils import common_functions 
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error
import mlflow
from mlflow.exceptions import RestException
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.backend import clear_session
from prophet import Prophet


# Iniciando funciones

In [119]:
create_sequences = common_functions().create_sequences

# Leyendo data y limpiandola

In [418]:
path_data = f'../SERIET_PLUS_MODELADO/datos_DOGEUSDT.csv'
df = pd.read_csv(path_data,header=0)

In [419]:
#Limpieando la data 

# Convertir "Open time" y "Close time" a datetime y darle formato ISO 8601
df['Open time_date'] = pd.to_datetime(df['Open time'], unit='ms').apply(lambda x: x.isoformat())
df['Close time_date'] = pd.to_datetime(df['Close time'], unit='ms').apply(lambda x: x.isoformat())

#Escalando el valor de el campo Close
df['closex1M'] = df['Close'] * 100

# NORMALIZAR LA DATA QUITAR ESA TRASFORMACION

scaler = MinMaxScaler(feature_range=(0, 1))  # O feature_range=(-1, 1)
df['closex1M'] = scaler.fit_transform(df['closex1M'].to_numpy().reshape(-1, 1))


In [420]:
#df['closex1M'] = np.log1p(df['closex1M'])
#np.log1p(df['closex1M'])

In [421]:
#seleccionando los campos que necesitamos
select = ['Close time_date','closex1M']
df_clean = df[select]

In [422]:
df_clean.shape

(15790, 2)

In [423]:
df_clean.head()

Unnamed: 0,Close time_date,closex1M
0,2023-04-17T05:59:59.999000,0.076827
1,2023-04-17T06:59:59.999000,0.076348
2,2023-04-17T07:59:59.999000,0.085209
3,2023-04-17T08:59:59.999000,0.087796
4,2023-04-17T09:59:59.999000,0.088059


In [424]:
df_train = pd.DataFrame(columns=['ds','y'])

df_train['ds'] =  df_clean.iloc[:-473,:]['Close time_date']
df_train['y'] = df_clean.iloc[:-473,:]['closex1M']

In [425]:
df_test = pd.DataFrame(columns=['ds','y'])

df_test['ds'] =  df_clean.iloc[-473:,:]['Close time_date']
df_test['y'] = df_clean.iloc[-473:,:]['closex1M']

# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [426]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'SERIET_PLUS_MODELADO' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Juan Carlos Cabrera'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'SERIET_PLUS_MODELADO' already exists.
Full name SERIET_PLUS_MODELADO


## Creando la ejecucion

In [427]:
nombre_hijo = 'PROPHET'
nombre_nieto = 'ST_NORMALIZADA'

with mlflow.start_run(experiment_id=experiment_id
                      #,run_name='PEPE'# El nombre se coloca solo la primera vez
                      ,run_id='625b17f3353949748ca9a3e1115fb9dd'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          #,run_name= nombre_hijo # Acá se ponne el nombre de la ejecución
                          , run_id= 'a0d3f93d11ea4f03a355edfa5b5ae090'
                          ,nested=True) as run_child:
        with mlflow.start_run(experiment_id=experiment_id
        , run_name= nombre_nieto
        , nested= True) as run_grandchild:
            print(f'Run creada para {nombre_nieto} con run_id',run_grandchild.info.run_id)
        print(f'Run creada para {nombre_hijo} con run_id',run_child.info.run_id)
        

Run creada para ST_NORMALIZADA con run_id 9de32cba40d84071ac61b56be20b4052
🏃 View run ST_NORMALIZADA at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/9de32cba40d84071ac61b56be20b4052
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241
Run creada para PROPHET con run_id a0d3f93d11ea4f03a355edfa5b5ae090
🏃 View run PROPHET at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/a0d3f93d11ea4f03a355edfa5b5ae090
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241
🏃 View run DOGE at: http://34.58.215.162:8080/#/experiments/509966702496361241/runs/625b17f3353949748ca9a3e1115fb9dd
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/509966702496361241


In [405]:
# Ejemplo
#mlflow.log_artifact(wave_img_path,run_id=run_child.info.run_id,artifact_path='resultados')

# Entrenamiento

## Preparacion de la data

In [361]:


# Reshape para que sea compatible con LSTM (samples, time steps, features)
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


## Creando modelo

In [428]:
changepoint_prior_scale = 0.5

In [429]:
# Inicializar Prophet
modelo = Prophet(changepoint_prior_scale =changepoint_prior_scale)

# Ajustar el modelo a los datos transformados
modelo.fit(df_train)

16:04:08 - cmdstanpy - INFO - Chain [1] start processing
16:04:20 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x23e9d68d490>

In [430]:
# Crear un DataFrame con las fechas futuras 
train_pred = modelo.predict(df_train)

# Hacer la predicción
test_pred = modelo.predict(df_test)

In [431]:
df_clean.closex1M = scaler.inverse_transform(df_clean.closex1M.to_numpy().reshape(-1,1))
train_pred['yhat'] =scaler.inverse_transform(train_pred['yhat'].to_numpy().reshape(-1,1))
test_pred['yhat'] =scaler.inverse_transform(test_pred['yhat'].to_numpy().reshape(-1,1))




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [432]:
model_result_path = 'model_results.html'

In [433]:
fig = go.Figure()

dates = df_clean['Close time_date']

date_train = dates[:len(train_pred)]
date_test = dates[len(train_pred):]


fig.add_trace(go.Scatter(x=dates
                            ,y=df_clean.closex1M
                            ,mode='lines'
                            ,name='real'
                            ,line = dict(color='green')))

fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred['yhat']
                            ,mode='lines'
                            ,name='train'   
                            ,line=dict(color='blue')))

fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred['yhat']
                            ,mode='lines'
                            ,name='test'
                            ,line= dict(color='red')))


fig.write_html(model_result_path)
    
fig.show()



In [434]:
#Guardando la imagen del resultado del modelo
mlflow.log_artifact(model_result_path,run_id=run_grandchild.info.run_id,artifact_path='resultados')

In [435]:
# Guardando los parametros en la ejecucion 
parameters = {'data_standarizada':False
              ,'puntos_entrenamiento':X_train.shape[0]
              ,'puntos_testeo':x_test.shape[0]
              , 'changepoint_prior_scale':changepoint_prior_scale}

mlflow.log_params(params=parameters
                  ,run_id=run_grandchild.info.run_id)

# Guardando metricas

In [436]:
test_pred.shape

(473, 19)

In [437]:
test_pred['yhat']

0      43.782694
1      43.787843
2      43.786931
3      43.783804
4      43.784381
         ...    
468    49.276113
469    49.298258
470    49.314855
471    49.328156
472    49.343596
Name: yhat, Length: 473, dtype: float64

In [438]:

mae_test = mean_absolute_error(y_true= scaler.inverse_transform(df_test['y'].to_numpy().reshape(-1,1)), y_pred=test_pred['yhat'])
mae_train =mean_absolute_error(y_true= scaler.inverse_transform(df_train['y'].to_numpy().reshape(-1,1)), y_pred=train_pred['yhat'])


print(f"MAE train: {mae_train},   MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_test':mae_test}

MAE train: 1.6317595013443777,   MAE test: 11.367441205782308


In [373]:
'''
mae_test = mean_absolute_error(y_true=df_test['y'], y_pred=test_pred['yhat'])
mae_train =mean_absolute_error(y_true=df_train['y'], y_pred=train_pred['yhat'])


print(f"MAE train: {mae_train},   MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_test':mae_test}'''

MAE train: 1.6308520389839885,   MAE test: 11.393191154154223


In [439]:
mlflow.log_metrics(metrics=metrics,run_id=run_grandchild.info.run_id)

In [395]:
df_test

Unnamed: 0,ds,y
15317,2025-01-14T10:59:59.999000,0.704977
15318,2025-01-14T11:59:59.999000,0.698007
15319,2025-01-14T12:59:59.999000,0.692715
15320,2025-01-14T13:59:59.999000,0.700762
15321,2025-01-14T14:59:59.999000,0.702390
...,...,...
15785,2025-02-02T22:59:59.999000,0.497030
15786,2025-02-02T23:59:59.999000,0.504287
15787,2025-02-03T00:59:59.999000,0.472100
15788,2025-02-03T01:59:59.999000,0.411246
