# Importing Libraries

In [48]:
import pandas as pd
from utils import common_functions 
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error

import mlflow
from mlflow.exceptions import RestException

# Iniciando funciones

In [2]:
get_wt_coeff_inv = common_functions().get_wt_coeff_inv
plot_inv_wv = common_functions().plot_inv_wv
create_sequences = common_functions().create_sequences

# Leyendo data y limpiandola

In [3]:
path_data = f'../sandbox/pepe.csv'
df = pd.read_csv(path_data,header=0)

In [4]:
#Limpieando la data 

# Convertir "Open time" y "Close time" a datetime y darle formato ISO 8601
df['Open time_date'] = pd.to_datetime(df['Open time'], unit='ms').apply(lambda x: x.isoformat())
df['Close time_date'] = pd.to_datetime(df['Close time'], unit='ms').apply(lambda x: x.isoformat())

#Escalando el valor de el campo Close
df['closex1M'] = df['Close'] * 1000000


In [5]:
#seleccionando los campos que necesitamos
select = ['Close time_date','closex1M']
df_clean = df[select]

# Conexion a Mlflow server
tambien se crean las ejecuciones necesarias


In [50]:
remote_server_uri = "http://34.58.215.162:8080/"  # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)

#Creacion o identificacion del experimento 
try:
    experiment_name = 'INV_WV_PLUS_MODELS' #Puede ser cualquiera siempre y cuando no se troque con otro
    experiment_id = (mlflow
                        .create_experiment(name=experiment_name
                                            ,tags={'created_by':'Victor Moreno'})) #importante poner el nombre de quien lo crea
except RestException as r:
    print(r)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Full name',experiment.name)
    experiment_id = experiment.experiment_id

RESOURCE_ALREADY_EXISTS: Experiment 'INV_WV_PLUS_MODELS' already exists.
Full name INV_WV_PLUS_MODELS


## Creando la ejecucion

In [51]:
with mlflow.start_run(experiment_id=experiment_id
                      #,run_name='LSTM'# El nombre se coloca solo la primera vez
                      ,run_id='42e6592c9dd141b99eec4768fea566be'
                      ) as run_parent:
    with mlflow.start_run(experiment_id=experiment_id
                          ,run_name='cA2cD4'
                          ,nested=True) as run_child:
        print('Run creada para cA2cD3 con run_id',run_child.info.run_id)

Run creada para cA2cD3 con run_id bfea640ece2641468a5e4c57e1f890b9
🏃 View run cA2cD4 at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/bfea640ece2641468a5e4c57e1f890b9
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655
🏃 View run LSTM at: http://34.58.215.162:8080/#/experiments/753905317043302655/runs/42e6592c9dd141b99eec4768fea566be
🧪 View experiment at: http://34.58.215.162:8080/#/experiments/753905317043302655


# Aplicando Transformada de Wavelet

In [6]:
n = df_clean.shape[0] #Cantidad de puntos a tratar
data = np.array(df_clean['closex1M'][:n]) #valores de la serie temporal
dates = df_clean['Close time_date'][:n] #valores de las fechas

#llamando la funcion de get_wt_coeff_inv para obtener 
## Coeficientes de wavelet y la senal resconstruida desde estos
coeffs_lv3, inv_coeffs_lv3 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=3
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv2, inv_coeffs_lv2 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=2
                                      ,mode='symmetric'
                                      ,take=n)

coeffs_lv4, inv_coeffs_lv4 = get_wt_coeff_inv(signal=data
                                      ,wavelet='db1'
                                      ,level=4
                                      ,mode='symmetric'
                                      ,take=n)

## Visualizando la senal reconstruida

In [7]:
#incorporando la senal real en tiempo como externa para comparacion
external_signals = {'real':data,
                    'cA2':inv_coeffs_lv2['cA2']
                    }
plot_inv_wv(inv_coeffs=inv_coeffs_lv4
            ,date_signal=dates
            ,external_signals=external_signals)

# Entrenamiento

## Preparacion de la data

In [8]:
df_test = pd.DataFrame({'close':[1,2,3,4,5,6,7,8,9,0]
                       ,'aux1':[1,2,3,4,5,6,7,8,9,0]})
x_t, y_t = create_sequences(df_test,window_size=5,target_col='close')
x_t[0],y_t

(array([[1, 1],
        [2, 2],
        [3, 3],
        [4, 4],
        [5, 5]]),
 array([6, 7, 8, 9, 0]))

In [None]:
aproximacion = 'cA2'
detalle = 'cD4'

In [None]:
df_train = pd.DataFrame({'cA2':inv_coeffs_lv2['cA2']
                         ,'cD4':inv_coeffs_lv4['cD4']})
window_size = 20
X, Y = create_sequences(df_train,window_size=window_size,target_col='cA2')

#Espliteando la data
X_train, X_vt, y_train, y_vt = train_test_split(X, Y, test_size=0.3, shuffle=False)

x_val, x_test, y_val, y_test = train_test_split(X_vt,y_vt,test_size=0.1, shuffle=False)

# Reshape para que sea compatible con LSTM (samples, time steps, features)
#X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
#X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


(10724, 20, 2)

## Creando modelo

In [38]:
model = Sequential([
    Input(shape=(window_size,2)),
    LSTM(100,activation='relu',return_sequences=True),
    LSTM(50,activation='relu',return_sequences=True),
    LSTM(25,activation='relu',return_sequences=False),
    Dense(1)
])
model.compile(optimizer='adam',loss='mse',metrics=['mae'])

#Entrenando el modelo
history = model.fit(X_train,
                    y_train,
                    epochs=10,
                    batch_size=16,
                    validation_data=(x_val, y_val),
                    verbose=1
                    )

Epoch 1/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - loss: 5.6088 - mae: 0.6970 - val_loss: 1.0245 - val_mae: 0.5746
Epoch 2/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 0.0424 - mae: 0.1215 - val_loss: 0.2654 - val_mae: 0.2962
Epoch 3/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 13ms/step - loss: 0.0347 - mae: 0.1106 - val_loss: 0.3564 - val_mae: 0.3258
Epoch 4/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 14ms/step - loss: 0.0357 - mae: 0.1028 - val_loss: 0.7963 - val_mae: 0.4234
Epoch 5/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 13ms/step - loss: 0.0177 - mae: 0.0786 - val_loss: 1.6971 - val_mae: 0.6073
Epoch 6/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 14ms/step - loss: 0.0230 - mae: 0.0882 - val_loss: 3.4237 - val_mae: 0.8344
Epoch 7/10
[1m671/671[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 14m

In [40]:
train_pred = model.predict(X_train).reshape(-1)
val_pred = model.predict(x_val).reshape(-1)
test_pred = model.predict(x_test).reshape(-1)

[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


In [43]:
train_test = {'train':train_pred,'test':test_pred}

fig = go.Figure()

date_train = dates[:len(train_pred)]
date_val = dates[len(train_pred):len(train_pred)+len(val_pred)]
date_test = dates[len(train_pred)+len(val_pred):]
fig.add_trace(go.Scatter(x=date_train
                            ,y=train_pred
                            ,mode='lines'
                            ,name='train'))

fig.add_trace(go.Scatter(x=date_test
                            ,y=test_pred
                            ,mode='lines'
                            ,name='test'))

fig.add_trace(go.Scatter(x=dates
                            ,y=data
                            ,mode='lines'
                            ,name='real'))

fig.add_trace(go.Scatter(x=date_val
                            ,y=val_pred
                            ,mode='lines'
                            ,name='val'))
    
fig.show()



In [47]:

mae_test = mean_absolute_error(y_true=y_test, y_pred=test_pred)
mae_train = mean_absolute_error(y_true=y_train, y_pred=train_pred)
mae_val = mean_absolute_error(y_true=y_val, y_pred=val_pred)

print(f"MAE train: {mae_train},   MAE val: {mae_val},  MAE test: {mae_test}")

MAE train: 0.18680058179507175,   MAE val: 0.8097266814648739,  MAE test: 0.4643624739439557
