In [1]:
import mlflow
from mlflow.exceptions import RestException
import pandas as pd
from utils import common_functions 
import numpy as np

import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
import keras.models
from tensorflow.keras.layers import LeakyReLU
import os 
import math

In [2]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'INV_WV_PLUS_MODELS' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Victor Moreno'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'INV_WV_PLUS_MODELS' already exists.
Full name INV_WV_PLUS_MODELS


In [3]:
def load_model(run_id):
    loaded_model = f"runs:/{run_id}/model"
    try:
        loaded_model = mlflow.pyfunc.load_model(loaded_model)
        model = loaded_model.get_raw_model()
    except ValueError as e:
        mlflow.artifacts.download_artifacts(artifact_uri=f'runs:/{run_id}/model/data/model.keras',dst_path='.')
        model = keras.models.load_model('model.keras',custom_objects={'LeakyReLU':LeakyReLU})
        os.remove('model.keras')
    return model

In [4]:
df = pd.read_csv('data/datos_PEPEUSDT.csv',header=0)

#multipling by 1M the close data
df['closex1M'] = df['Close']*1000000 #PEPE
#df['closex1M'] = df['Close']*100 #DOGE

select = ['Close time_date','closex1M']
df_clean = df[select]

In [6]:
get_wt_coeff_inv = common_functions().get_wt_coeff_inv
plot_inv_wv = common_functions().plot_inv_wv
create_sequences = common_functions().create_sequences


In [7]:
n = df_clean.shape[0] #Cantidad de puntos a tratar
data = np.array(df_clean['closex1M'][:n]) #valores de la serie temporal
dates = df_clean['Close time_date'][:n] #valores de las fechas

In [8]:
def load_inv_coeff(wavelet):
    #llamando la funcion de get_wt_coeff_inv para obtener 
    ## Coeficientes de wavelet y la senal resconstruida desde estos
    coeffs_lv3, inv_coeffs_lv3 = get_wt_coeff_inv(signal=data
                                        ,wavelet=wavelet
                                        ,level=3
                                        ,mode='symmetric'
                                        ,take=n)

    coeffs_lv1, inv_coeffs_lv1 = get_wt_coeff_inv(signal=data
                                        ,wavelet=wavelet#'db1'
                                        ,level=1
                                        ,mode='symmetric'
                                        ,take=n)


    coeffs_lv2, inv_coeffs_lv2 = get_wt_coeff_inv(signal=data
                                        ,wavelet=wavelet
                                        ,level=2
                                        ,mode='symmetric'
                                        ,take=n)

    coeffs_lv4, inv_coeffs_lv4 = get_wt_coeff_inv(signal=data
                                        ,wavelet=wavelet
                                        ,level=4
                                        ,mode='symmetric'
                                        ,take=n)
    output = {'coeffs_lv1':coeffs_lv1,
              'inv_coeffs_lv1':inv_coeffs_lv1,
              'coeffs_lv2':coeffs_lv2,
              'inv_coeffs_lv2':inv_coeffs_lv2,
              'coeffs_lv3':coeffs_lv3,
              'inv_coeffs_lv3':inv_coeffs_lv3,
              'coeffs_lv4':coeffs_lv4,
              'inv_coeffs_lv4':inv_coeffs_lv4}
    return output

In [None]:
y_traina, y_vala = train_test_split(df_clean,test_size=0.3,shuffle=False)
y_vala, y_testa = train_test_split(y_vala,test_size=0.1,shuffle=False)
y_testa = y_testa['closex1M'].values
y_testa = y_testa[:-1]
y_testa.shape




# UPDATING THE MAE

## LSTM

In [None]:
parent_runid = '032cc4d2e33241fcbe739bcd55ea97df'

In [None]:
df_runs = mlflow.search_runs(experiment_ids=experiment_id)
df_runs = df_runs[df_runs['tags.mlflow.parentRunId']==parent_runid]
for run_id in df_runs['run_id'].values:
    df_run = df_runs[df_runs['run_id']==run_id].head()
    wavelet = df_run['params.wavelet'].values[0]
    wavelet = wavelet if wavelet!= None else 'db1'
    aproximacion = df_run['params.CoeficienteAproximacion'].values[0]
    detalle = df_run['params.CoeficienteDetalle'].values[0]
    print(run_id)

    #cargando modelo 
    model = load_model(run_id)
    inv_coeffs= load_inv_coeff(wavelet)

    print(f'inv_coeffs_lv{aproximacion[-1]}',f'inv_coeffs_lv{detalle[-1]}')
    df_train = pd.DataFrame({aproximacion:inv_coeffs[f'inv_coeffs_lv{aproximacion[-1]}'][aproximacion]
                             ,detalle:inv_coeffs[f'inv_coeffs_lv{detalle[-1]}'][detalle]})
    
    window_size = 24
    
    X, Y = create_sequences(df_train,window_size=window_size,target_col=aproximacion)

    #Espliteando la data
    X_train, X_vt, y_train, y_vt = train_test_split(X, Y, test_size=0.3, shuffle=False)

    x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)

    #Obteniendo las prediccciones del modelo
    train_pred = model.predict(X_train).reshape(-1)
    val_pred = model.predict(x_val).reshape(-1)
    test_pred = model.predict(x_test).reshape(-1)

    rmse_test = root_mean_squared_error(y_true=y_test,y_pred=test_pred)
    rmse_train = root_mean_squared_error(y_true=y_train, y_pred=train_pred)
    rmse_val = root_mean_squared_error(y_true=y_val, y_pred=val_pred)

    print(f"RMSE train: {rmse_train},   RMSE val: {rmse_val},  RMSE test: {rmse_test}\n")

    metrics = {'rmse_train':rmse_train,'rmse_val':rmse_val,'rmse_test':rmse_test}
    del model
    del train_pred, val_pred, test_pred


    #for metric,value in metrics.items():
    #    mlflow.log_metric(metric,value,run_id=run_id)
    

## Prophet

In [9]:
parent_runid = '6d1b256d8a7144a89146e017c0a7a178'

In [10]:
df_runs = mlflow.search_runs(experiment_ids=experiment_id)
df_runs = df_runs[df_runs['tags.mlflow.parentRunId']==parent_runid]
for run_id in df_runs['run_id'].values:
    df_run = df_runs[df_runs['run_id']==run_id].head()
    wavelet = df_run['params.wavelet'].values[0]
    wavelet = wavelet if wavelet!= None else 'db1'
    aproximacion = df_run['params.CoeficienteAproximacion'].values[0]
    detalle = df_run['params.CoeficienteDetalle'].values[0]
    print(run_id)

    #cargando modelo 
    model = load_model(run_id)
    inv_coeffs= load_inv_coeff(wavelet)

    print(f'inv_coeffs_lv{aproximacion[-1]}',f'inv_coeffs_lv{detalle[-1]}')
    df_train = pd.DataFrame({'ds':df_clean['Close time_date']
                            ,'y':inv_coeffs[f'inv_coeffs_lv{aproximacion[-1]}'][aproximacion]
                             ,detalle:inv_coeffs[f'inv_coeffs_lv{detalle[-1]}'][detalle]})

    def split_data(df,partitions,method='points'):
        df_ = df.copy()
        if method == 'percent':
            num_points = df_.shape[0]
            up_limit_train = math.ceil(num_points*partitions[0])
            #up_limit_test = math.ceil(up_limit_train + num_points*partitions[1])
        else:
            up_limit_train = partitions[0]

        df_train = df_.iloc[:up_limit_train]
        df_test = df_.iloc[up_limit_train:]

        return df_train,df_test

    df_train,df_test = split_data(df_train,[14861,None],'points') 
    y_test = df_clean['closex1M'].iloc[df_train.shape[0]:]
    y_train = df_clean['closex1M'].iloc[:df_train.shape[0]]
    

    train_pred_df = model.predict(df_train)
    test_pred_df = model.predict(df_test)
    train_pred = train_pred_df['yhat']
    test_pred = test_pred_df['yhat']

    rmse_test = root_mean_squared_error(y_true=y_test,y_pred=test_pred)
    rmse_train = root_mean_squared_error(y_true=y_train, y_pred=train_pred)

    print(f"RMSE train: {rmse_train},  RMSE test: {rmse_test}\n")

    metrics = {'rmse_train':rmse_train,'rmse_test':rmse_test}
    del model
    del train_pred, test_pred


    for metric,value in metrics.items():
        mlflow.log_metric(metric,value,run_id=run_id)
    

e28ef98ca1834900b39a1a78558259ed


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 5/5 [00:01<00:00,  4.95it/s]


inv_coeffs_lv1 inv_coeffs_lv3
RMSE train: 1.649180610032769,  RMSE test: 9.535860132898161

76bf3b9f925c463cb52d7236beb184c9


Downloading artifacts: 100%|██████████| 5/5 [00:01<00:00,  4.14it/s]


inv_coeffs_lv1 inv_coeffs_lv3
RMSE train: 1.649180610032769,  RMSE test: 9.535860132898161

d22bc88b9bf746898e6f88c8f26a24ad


Downloading artifacts: 100%|██████████| 5/5 [00:01<00:00,  4.73it/s]


inv_coeffs_lv1 inv_coeffs_lv3
RMSE train: 1.649180610032769,  RMSE test: 9.535860132898161

6f58ca16606d4ac4b1e0183473a834a0


Downloading artifacts: 100%|██████████| 5/5 [00:01<00:00,  4.45it/s]


inv_coeffs_lv1 inv_coeffs_lv3
RMSE train: 1.6490008995666414,  RMSE test: 9.5490083988102

eafac46063924b5b92899ea7b1569bfd


Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


inv_coeffs_lv3 inv_coeffs_lv1
RMSE train: 1.6500173285183268,  RMSE test: 9.541440123521033

6e6af7b9640c4de8b645277b8e3bcb27


Downloading artifacts: 100%|██████████| 5/5 [00:01<00:00,  4.87it/s]


inv_coeffs_lv3 inv_coeffs_lv1
RMSE train: 1.6793379999764486,  RMSE test: 9.534569554732546

e4333e7e699948e7a66b8e86595e75f1


Downloading artifacts: 100%|██████████| 5/5 [00:01<00:00,  4.82it/s]


inv_coeffs_lv3 inv_coeffs_lv1
RMSE train: 1.6793379999764486,  RMSE test: 9.534569554732546



# ADDING THE REAL MAE

## LSTM

In [None]:
parent_runid = '032cc4d2e33241fcbe739bcd55ea97df'

(Index(['Close time_date', 'closex1M'], dtype='object'),
 Index(['cA3', 'cD1'], dtype='object'))

In [None]:
df_runs = mlflow.search_runs(experiment_ids=experiment_id)
df_runs = df_runs[df_runs['tags.mlflow.parentRunId']==parent_runid]
for run_id in df_runs['run_id'].values:
    df_run = df_runs[df_runs['run_id']==run_id].head()
    wavelet = df_run['params.wavelet'].values[0]
    wavelet = wavelet if wavelet!= None else 'db1'
    aproximacion = df_run['params.CoeficienteAproximacion'].values[0]
    detalle = df_run['params.CoeficienteDetalle'].values[0]
    print(run_id)

    #cargando modelo 
    model = load_model(run_id)
    inv_coeffs= load_inv_coeff(wavelet)

    print(f'inv_coeffs_lv{aproximacion[-1]}',f'inv_coeffs_lv{detalle[-1]}')
    df_train = pd.DataFrame({aproximacion:inv_coeffs[f'inv_coeffs_lv{aproximacion[-1]}'][aproximacion]
                             ,detalle:inv_coeffs[f'inv_coeffs_lv{detalle[-1]}'][detalle]})
    
    window_size = 24
    
    X, Y = create_sequences(df_clean,window_size=window_size,target_col=aproximacion)

    #Espliteando la data
    X_train, X_vt, y_train, y_vt = train_test_split(X, Y, test_size=0.3, shuffle=False)

    x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)

    y_train_r = df_clean.iloc[:len(y_train)]
    y_val_r = df_clean.iloc[len(y_train):len(y_train)+len(y_val)]
    y_test_r = df_clean.iloc[len(y_train)+len(y_val):]

    print(f"y_train: {len(y_train)} ---- y_train_r: {len(y_train_r)}")
    print(f"y_val: {len(y_val)} ---- y_val_r: {len(y_val_r)}")
    print(f"y_test: {len(y_test)} ---- y_test_r: {len(y_test_r)}")

    #Obteniendo las prediccciones del modelo
    train_pred = model.predict(X_train).reshape(-1)
    val_pred = model.predict(x_val).reshape(-1)
    test_pred = model.predict(x_test).reshape(-1)

    rmse_test = root_mean_squared_error(y_true=y_test,y_pred=test_pred)
    rmse_train = root_mean_squared_error(y_true=y_train, y_pred=train_pred)
    rmse_val = root_mean_squared_error(y_true=y_val, y_pred=val_pred)

    print(f"RMSE_real train: {rmse_train},   RMSE_real val: {rmse_val},  RMSE_real test: {rmse_test}\n")

    metrics = {'real_rmse_train':rmse_train,'real_rmse_val':rmse_val,'real_rmse_test':rmse_test}
    del model
    del train_pred, val_pred, test_pred


    #for metric,value in metrics.items():
    #    mlflow.log_metric(metric,value,run_id=run_id)
    

00d56ebd83334d37b2818c233e04d1a4


Downloading artifacts: 100%|██████████| 6/6 [00:00<00:00,  6.91it/s]   


inv_coeffs_lv3 inv_coeffs_lv1


KeyError: 'cA3'