# Importing Libraries

In [1]:
import pandas as pd
from utils import common_functions 
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error

import mlflow
from mlflow.exceptions import RestException
from prophet import Prophet
import math
import numpy as np

from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


# Iniciando funciones

In [2]:
get_wt_coeff_inv = common_functions().get_wt_coeff_inv
plot_inv_wv = common_functions().plot_inv_wv
create_sequences = common_functions().create_sequences
standarize_data = False

# loading data

In [3]:
df = pd.read_csv('data/datos_PEPEUSDT.csv',header=0)

#multipling by 1M the close data
df['closex1M'] = df['Close']*1000000 #PEPE
#df['closex1M'] = df['Close']*100 #DOGE
#df['closex1M'] = df['Close']*10 #xrp

In [4]:
#seleccionando los campos que necesitamos
select = ['Close time_date','closex1M']
#select = ['Close time_date','closeNormalized']

if standarize_data:
    #Standarizing data
    scaler = MinMaxScaler()
    df['closex1M'] = scaler.fit_transform(df[['closex1M']])
    df['closex1M'] = df['closex1M'] * 10

df_clean = df[select]

# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [5]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'INV_WV_PLUS_MODELS' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Victor Moreno'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'INV_WV_PLUS_MODELS' already exists.
Full name INV_WV_PLUS_MODELS


## Creando la ejecucion

Se definen las variables iniciales necesarias para ejecuciones abajo

In [6]:
aproximacion = 'cA1'
detalle = 'cD3'
parent_run = 'PEPE'
child_lv1 = 'PROPHET'
child_lv2 = aproximacion+detalle


#variables para entrenar modelo

loss_metric = 'mse'
model_metrics = ['mae']
model_result_path = 'model_results.html'

In [7]:
parent_run, child_lv1, child_lv2

('PEPE', 'PROPHET', 'cA1cD3')

In [8]:
with mlflow.start_run(experiment_id=experiment_id
                      #,run_name=parent_run# El nombre se coloca solo la primera vez
                      ,run_id='edbf69750c2c43a0a1b3568696584af1'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          ,run_name=child_lv1
                          ,run_id='6d1b256d8a7144a89146e017c0a7a178'
                          ,nested=True) as child_run1:
        with mlflow.start_run(experiment_id=experiment_id
                          ,run_name=child_lv2
                          ,nested=True) as child_run2:
            print(f'Run creada para {child_lv2} con run_id',child_run2.info.run_id)
            mlflow.end_run()


Run creada para cA1cD3 con run_id d22bc88b9bf746898e6f88c8f26a24ad
🏃 View run cA1cD3 at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/d22bc88b9bf746898e6f88c8f26a24ad
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655
🏃 View run PROPHET at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/6d1b256d8a7144a89146e017c0a7a178
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655
🏃 View run PEPE at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/edbf69750c2c43a0a1b3568696584af1
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655


# Aplicando Transformada de Wavelet

In [9]:
n = df_clean.shape[0] #Cantidad de puntos a tratar
data = np.array(df_clean['closex1M'][:n]) #valores de la serie temporal
dates = df_clean['Close time_date'][:n] #valores de las fechas

#llamando la funcion de get_wt_coeff_inv para obtener 
## Coeficientes de wavelet y la senal resconstruida desde estos
coeffs_lv3, inv_coeffs_lv3 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=3
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv2, inv_coeffs_lv2 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=2
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv4, inv_coeffs_lv4 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=4
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv1, inv_coeffs_lv1 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=1
                                      ,mode='symmetric'
                                      ,take=n)

## Visualizando la senal reconstruida

In [10]:
#incorporando la senal real en tiempo como externa para comparacion

wave_img_path = 'descomposicion_wavelet.html'

external_signals = {'real':data,
                    aproximacion:inv_coeffs_lv1[aproximacion]
                    }
plot_inv_wv(inv_coeffs=inv_coeffs_lv3
            ,date_signal=dates
            ,external_signals=external_signals
            ,output_path=wave_img_path)

In [11]:
mlflow.log_artifact(wave_img_path,run_id=child_run2.info.run_id,artifact_path='resultados')

# Entrenamiento

## Preparacion de la data

In [12]:
df_train = pd.DataFrame({'ds':df_clean['Close time_date']
                        ,'y':inv_coeffs_lv1[aproximacion]
                         ,detalle:inv_coeffs_lv3[detalle]})

def split_data(df,partitions,method='points'):
    df_ = df.copy()
    if method == 'percent':
        num_points = df_.shape[0]
        up_limit_train = math.ceil(num_points*partitions[0])
        #up_limit_test = math.ceil(up_limit_train + num_points*partitions[1])
    else:
        up_limit_train = partitions[0]

    df_train = df_.iloc[:up_limit_train]
    df_test = df_.iloc[up_limit_train:]

    return df_train,df_test

df_train,df_test = split_data(df_train,[14861,None],'points') 


## Creando modelo

In [13]:
changepoint_prior_scale = 30
m = Prophet(changepoint_prior_scale=changepoint_prior_scale)
m.add_regressor(detalle)
ti = datetime.now()
m.fit(df_train)
tf = datetime.now()

22:52:30 - cmdstanpy - INFO - Chain [1] start processing
22:52:33 - cmdstanpy - INFO - Chain [1] done processing


In [14]:
# Guardando los parametros en la ejecucion 
parameters = {'data_standarizada':standarize_data
              ,'CoeficienteAproximacion':aproximacion
              ,'CoeficienteDetalle':detalle
              ,'loss_metrics':loss_metric
              ,'model_metrics':model_metrics
              ,'changepoint_prior_scale':changepoint_prior_scale
              ,'tiempo_entrenamiento':(tf-ti).seconds}

mlflow.log_params(params=parameters
                  ,run_id=child_run2.info.run_id)

In [15]:
#Obteniendo las prediccciones del modelo
train_pred_df = m.predict(df_train)
test_pred_df = m.predict(df_test)
train_pred = train_pred_df['yhat']
test_pred = test_pred_df['yhat']


In [16]:
fig = go.Figure()

date_train = df_train['ds']
date_test = df_test['ds']

fig.add_trace(go.Scatter(x=dates
                            ,y=data
                            ,mode='lines'
                            ,name='real'
                            ,line = dict(color='green')))

fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred
                            ,mode='lines'
                            ,name='train'
                            ,line=dict(color='blue')))


fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred
                            ,mode='lines'
                            ,name='test'
                            ,line= dict(color='red')))


fig.write_html(model_result_path)
    
fig.show()



In [17]:
#Saving model
signature = mlflow.models.infer_signature(np.array(df_train),np.array(train_pred))
#log the model
with mlflow.start_run(run_id=child_run2.info.run_id) as run_model:
    mlflow.prophet.log_model(m,'model',signature=signature)

🏃 View run cA1cD3 at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/d22bc88b9bf746898e6f88c8f26a24ad
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655


In [18]:
#Guardando la imagen del resultado del modelo
mlflow.log_artifact(model_result_path,run_id=child_run2.info.run_id,artifact_path='resultados')

# Guardando metricas

In [19]:
y_train = df_clean.iloc[:df_train.shape[0]]['closex1M']
y_test = df_clean.iloc[df_train.shape[0]:]['closex1M']

In [20]:
mae_test = mean_absolute_error(y_true=y_test, y_pred=test_pred)
mae_train = mean_absolute_error(y_true=y_train, y_pred=train_pred)

print(f"MAE train: {mae_train},  MAE test: {mae_test}")

metrics = {'mae_train':mae_train,'mae_test':mae_test}

MAE train: 0.9231558741911604,  MAE test: 9.03297248483579


In [21]:
mlflow.log_metrics(metrics=metrics,run_id=child_run2.info.run_id)