In [8]:
#! pip install matplotlib
!pip install PyWavelets
import pywt



# **CODIGO SIN PROBLEMAS POR PARTES**

# 1. OHLC Price and Wavelet Denoising

In [9]:
# Importar bibliotecas necesarias
import pandas as pd
import numpy as np
import pywt

# Cargar datos
file_path = '/content/EURUSD_hourly_data_with_sessions.csv'  # Ruta del archivo cargado
data = pd.read_csv(file_path)

# Preprocesar datos
data['datetime'] = pd.to_datetime(data['datetime'])
data.set_index('datetime', inplace=True)
data = data[['open', 'high', 'low', 'close']]

# Aplicar transformación Wavelet
def denoise_wavelet(data, wavelet='db1', level=1):
    coeffs = pywt.wavedec(data, wavelet, mode='symmetric', level=level)
    coeffs[-1] = np.zeros_like(coeffs[-1])
    coeffs[-2] = np.zeros_like(coeffs[-2])
    return pywt.waverec(coeffs, wavelet, mode='symmetric')

denoised_close = denoise_wavelet(data['close'].values)
data['denoised_close'] = denoised_close

# 2. Technical Indicators

In [10]:
# Calcular indicadores técnicos
data['rsi'] = data['close'].diff().apply(lambda x: max(x, 0)).rolling(window=14).mean() / \
               data['close'].diff().abs().rolling(window=14).mean()
data['macd'] = data['close'].ewm(span=12).mean() - data['close'].ewm(span=26).mean()
data['signal'] = data['macd'].ewm(span=9).mean()


# 3. Image Matrix Creation

In [11]:
# Crear matriz temporal (30x30)
def create_time_matrix(data, lookback=30):
    sequences = []
    for i in range(len(data) - lookback):
        seq = data[i:i + lookback].values
        sequences.append(seq)
    return np.array(sequences)

matrix_data = create_time_matrix(data[['denoised_close', 'rsi', 'macd', 'signal']], lookback=30)
matrix_data = np.expand_dims(matrix_data, axis=-1)  # Expandir para formato de imagen


# 4. ResNet Feature Extraction

In [12]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, Add, GlobalAveragePooling2D, Dense, Input

# Definir bloques ResNet
def resnet_block(input_tensor, filters, kernel_size=3):
    x = Conv2D(filters, kernel_size, padding='same')(input_tensor)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(filters, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)
    x = Add()([x, input_tensor])
    x = ReLU()(x)
    return x

# Construir modelo ResNet
def build_resnet(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(64, 3, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    for _ in range(3):
        x = resnet_block(x, 64)

    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    model = Model(inputs, x)
    return model

resnet_model = build_resnet(matrix_data.shape[1:])
features = resnet_model.predict(matrix_data)


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 65ms/step


# 5. LightGBM Training

In [15]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd

# Simulación de datos para evitar dependencias (reemplaza con tus datos reales)
np.random.seed(42)
data = pd.DataFrame({
    'close': np.random.rand(1000) * 100,
    'feature1': np.random.rand(1000),
    'feature2': np.random.rand(1000),
    'feature3': np.random.rand(1000)
})
features = data[['feature1', 'feature2', 'feature3']]

# Preparar datos para LightGBM
labels = pd.cut(data['close'][30:], bins=5, labels=False)  # Categorías discretas
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
y_train = LabelEncoder().fit_transform(y_train)
y_test = LabelEncoder().fit_transform(y_test)

# Crear dataset LightGBM
lgb_train = lgb.Dataset(X_train, y_train)
lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)

# Parámetros y entrenamiento
params = {
    'objective': 'multiclass',
    'metric': 'multi_logloss',
    'boosting_type': 'gbdt',
    'learning_rate': 0.05,
    'num_leaves': 31,
    'num_class': len(np.unique(y_train))
}

# Validación cruzada y entrenamiento
cv_results = lgb.cv(
    params,
    lgb_train,
    num_boost_round=500,
    callbacks=[
        lgb.early_stopping(stopping_rounds=50),
        lgb.log_evaluation(10)
    ]
)

# Validar las claves disponibles en cv_results
print("Claves disponibles en cv_results:")
print(cv_results.keys())

# Acceder a la clave correcta
if 'multi_logloss-mean' in cv_results:
    best_iter = len(cv_results['multi_logloss-mean'])
elif 'valid_0-multi_logloss-mean' in cv_results:
    best_iter = len(cv_results['valid_0-multi_logloss-mean'])
else:
    raise KeyError("No se encontró la métrica 'multi_logloss-mean' en los resultados de la validación cruzada.")

# Entrenar modelo final
model = lgb.train(params, lgb_train, num_boost_round=best_iter)


ValueError: Found input variables with inconsistent numbers of samples: [1000, 970]

# 6. Prediction and Evaluation

In [None]:
from sklearn.metrics import accuracy_score

# Predicciones
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Evaluación
accuracy = accuracy_score(y_test, y_pred_classes)
print(f"Accuracy: {accuracy}")

# Visualización
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Real', alpha=0.7)
plt.plot(y_pred_classes, label='Predicción', alpha=0.7)
plt.title('Predicción vs Realidad (LightGBM + ResNet)')
plt.legend()
plt.show()


# 7.  Métricas de Evaluacion

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error

def calculate_metrics(y_true, y_pred):
    """
    Calcula las métricas de calidad para los modelos.

    Parámetros:
    y_true: array-like
        Valores reales.
    y_pred: array-like
        Valores predichos.

    Retorna:
    dict
        Diccionario con MAE, MSE y RMSE.
    """
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)

    return {
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse
    }

def display_metrics_table(models_metrics):
    """
    Muestra una tabla con las métricas calculadas para diferentes modelos.

    Parámetros:
    models_metrics: dict
        Diccionario con el nombre del modelo como clave y las métricas como valor.

    Retorna:
    None
    """
    metrics_df = pd.DataFrame(models_metrics).T
    metrics_df.columns = ['MAE (×10⁻³)', 'MSE (×10⁻⁶)', 'RMSE (×10⁻³)']
    metrics_df.index.name = 'Modelo'
    print(metrics_df)

# Integración con predicciones de LightGBM y datos finales
def run_final_metrics_evaluation(y_test, y_pred):
    """
    Ejecuta la evaluación de métricas al final del ejercicio y muestra los resultados.

    Parámetros:
    y_test: array-like
        Valores reales.
    y_pred: dict of array-like
        Diccionario con predicciones de los modelos.

    Retorna:
    None
    """
    models_metrics = {}

    for model_name, predictions in y_pred.items():
        metrics = calculate_metrics(y_test, predictions)

        # Escalar métricas para coincidir con valores reportados
        metrics['MAE'] *= 1e3
        metrics['MSE'] *= 1e6
        metrics['RMSE'] *= 1e3

        models_metrics[model_name] = metrics

    display_metrics_table(models_metrics)

# Ejemplo de uso final con predicciones del modelo LightGBM
def evaluate_lgb_model(y_test, y_pred_raw):
    """
    Ajusta las predicciones crudas de LightGBM y evalúa las métricas finales.

    Parámetros:
    y_test: array-like
        Valores reales.
    y_pred_raw: array-like
        Predicciones crudas del modelo LightGBM (probabilidades).

    Retorna:
    None
    """
    # Convertir probabilidades en clases
    y_pred_classes = np.argmax(y_pred_raw, axis=1)

    # Ejecutar evaluación
    run_final_metrics_evaluation(y_test, {"LightGBM": y_pred_classes})

# Integración en flujo con LightGBM
y_test_example = np.array([0, 1, 2, 1, 0])  # Ejemplo de clases verdaderas
y_pred_raw_example = np.array([
    [0.7, 0.2, 0.1],
    [0.1, 0.8, 0.1],
    [0.2, 0.3, 0.5],
    [0.2, 0.6, 0.2],
    [0.9, 0.05, 0.05]
])

evaluate_lgb_model(y_test_example, y_pred_raw_example)


## **ENTREGA FINAL 15-01-2024**

In [18]:
import pandas as pd
import numpy as np
import pywt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, Add, GlobalAveragePooling2D, Dense, Input
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Instalar dependencias necesarias para Google Colab
!pip install pywavelets lightgbm

# --------------------------- CONFIGURACIÓN INICIAL ---------------------------
def load_and_preprocess_data(file_path):
    """
    Carga y preprocesa los datos desde un archivo CSV.

    Args:
        file_path (str): Ruta al archivo CSV.

    Returns:
        pd.DataFrame: Datos preprocesados.
    """
    try:
        data = pd.read_csv(file_path)
        data['datetime'] = pd.to_datetime(data['datetime'])
        data.set_index('datetime', inplace=True)
        return data[['open', 'high', 'low', 'close']]
    except Exception as e:
        raise ValueError(f"Error al cargar o procesar el archivo: {e}")

# --------------------------- PROCESAMIENTO DE SEÑALES ---------------------------
def denoise_wavelet(data, wavelet='db1', level=1):
    """
    Aplica desruido de señal utilizando la transformada wavelet.

    Args:
        data (array-like): Datos de entrada.
        wavelet (str): Tipo de wavelet.
        level (int): Nivel de descomposición.

    Returns:
        np.ndarray: Señal suavizada.
    """
    coeffs = pywt.wavedec(data, wavelet, mode='symmetric', level=level)
    coeffs[-1] = np.zeros_like(coeffs[-1])
    coeffs[-2] = np.zeros_like(coeffs[-2])
    return pywt.waverec(coeffs, wavelet, mode='symmetric')

# --------------------------- CÁLCULO DE INDICADORES ---------------------------
def calculate_technical_indicators(data):
    """
    Calcula indicadores técnicos básicos como RSI y MACD.

    Args:
        data (pd.DataFrame): Datos OHLC.

    Returns:
        pd.DataFrame: Datos con indicadores añadidos.
    """
    data['rsi'] = data['close'].diff().apply(lambda x: max(x, 0)).rolling(window=14).mean() / \
                   data['close'].diff().abs().rolling(window=14).mean()
    data['macd'] = data['close'].ewm(span=12).mean() - data['close'].ewm(span=26).mean()
    data['signal'] = data['macd'].ewm(span=9).mean()
    return data

# --------------------------- CREACIÓN DE MATRICES TEMPORALES ---------------------------
def create_time_matrix(data, lookback=30):
    """
    Crea una matriz temporal para análisis secuencial.

    Args:
        data (pd.DataFrame): Datos de entrada.
        lookback (int): Tamaño de la ventana temporal.

    Returns:
        np.ndarray: Matriz temporal.
    """
    sequences = []
    for i in range(len(data) - lookback):
        seq = data[i:i + lookback].values
        sequences.append(seq)
    return np.array(sequences)

# --------------------------- DEFINICIÓN DEL MODELO RESNET ---------------------------
def resnet_block(input_tensor, filters, kernel_size=3):
    """
    Define un bloque ResNet básico.

    Args:
        input_tensor: Tensor de entrada.
        filters (int): Número de filtros.
        kernel_size (int): Tamaño del kernel.

    Returns:
        Tensor: Tensor de salida.
    """
    x = Conv2D(filters, kernel_size, padding='same')(input_tensor)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(filters, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)
    x = Add()([x, input_tensor])
    x = ReLU()(x)
    return x

def build_resnet(input_shape):
    """
    Construye el modelo ResNet.

    Args:
        input_shape (tuple): Forma de la entrada.

    Returns:
        Model: Modelo ResNet.
    """
    inputs = Input(shape=input_shape)
    x = Conv2D(64, 3, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    for _ in range(3):
        x = resnet_block(x, 64)

    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    return Model(inputs, x)

# --------------------------- ENTRENAMIENTO LIGHTGBM ---------------------------
def train_lightgbm(X_train, y_train, X_test, y_test):
    """
    Entrena un modelo LightGBM y evalúa su rendimiento.

    Args:
        X_train (pd.DataFrame): Conjunto de entrenamiento.
        y_train (pd.Series): Etiquetas de entrenamiento.
        X_test (pd.DataFrame): Conjunto de prueba.
        y_test (pd.Series): Etiquetas de prueba.

    Returns:
        Model: Modelo entrenado.
        dict: Resultados de validación cruzada.
    """
    params = {
        'objective': 'multiclass',
        'metric': 'multi_logloss',
        'boosting_type': 'gbdt',
        'learning_rate': 0.05,
        'num_leaves': 31,
        'num_class': len(np.unique(y_train))
    }

    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)

    cv_results = lgb.cv(
        params,
        lgb_train,
        num_boost_round=500,
        callbacks=[
            lgb.early_stopping(stopping_rounds=50),
            lgb.log_evaluation(10)
        ]
    )

    best_iter = len(cv_results['multi_logloss-mean'])
    model = lgb.train(params, lgb_train, num_boost_round=best_iter)
    return model, cv_results

# --------------------------- EVALUACIÓN Y MÉTRICAS ---------------------------
def evaluate_model(y_true, y_pred):
    """
    Calcula métricas de evaluación para un modelo.

    Args:
        y_true (array-like): Valores reales.
        y_pred (array-like): Valores predichos.

    Returns:
        dict: Métricas calculadas.
    """
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    return {'MAE': mae, 'MSE': mse, 'RMSE': rmse}

# --------------------------- FLUJO PRINCIPAL ---------------------------
if __name__ == "__main__":
    # Ruta del archivo (reemplazar con la correcta en Google Colab)
    file_path = '/content/EURUSD_hourly_data_with_sessions.csv'

    try:
        # Carga y preprocesamiento
        data = load_and_preprocess_data(file_path)
        data['denoised_close'] = denoise_wavelet(data['close'].values)
        data = calculate_technical_indicators(data)

        # Crear matriz temporal
        matrix_data = create_time_matrix(data[['denoised_close', 'rsi', 'macd', 'signal']], lookback=30)
        matrix_data = np.expand_dims(matrix_data, axis=-1)

        # Modelo ResNet
        resnet_model = build_resnet(matrix_data.shape[1:])
        features = resnet_model.predict(matrix_data)

        # Simulación de datos para LightGBM
        labels = pd.cut(data['close'][30:], bins=5, labels=False)
        X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
        model, cv_results = train_lightgbm(X_train, y_train, X_test, y_test)

        # Predicciones y evaluación
        y_pred = model.predict(X_test)
        y_pred_classes = np.argmax(y_pred, axis=1)
        metrics = evaluate_model(y_test, y_pred_classes)

        print(f"Métricas finales: {metrics}")

    except Exception as e:
        print(f"Error en la ejecución: {e}")


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 48ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.156357 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 132136
[LightGBM] [Info] Number of data points in the train set: 12780, number of used features: 526
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.165183 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 132136
[LightGBM] [Info] Number of data points in the train set: 12781, number of used features: 526
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.192621 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 132136
[LightGBM] [Info] Number of data points in the train set: 12781, number of used features: 526
[LightGBM] [Info] Auto-choosing col-wise multi-