
# Projeto Integrado — Deep Learning (CSV) — **Notebook Único (100% inline)**

Este notebook **mostra todos os resultados aqui dentro**, sem gerar arquivos externos.
- Treina **CNN 1D** e **LSTM** para **BBAS3SA, VALE3SA, PETR4SA, CSNA3SA** (pastas em `bases/`).
- Exibe **acurácia**, **matriz de confusão**, **precision/recall**, e **backtest** (se houver `close`) **inline**.
- Cria também **comparativo** (CNN × LSTM) em tabela com destaque do melhor por ativo.

> Estrutura esperada:
```
bases/
  BBAS3SA/{treino.csv, teste.csv, visualizacao.html}
  VALE3SA/{treino.csv, teste.csv, visualizacao.html}
  PETR4SA/{treino.csv, teste.csv, visualizacao.html}
  CSNA3SA/{treino.csv, teste.csv, visualizacao.html}
```


In [None]:

# ================== SETUP ==================
import os, re, warnings
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")

# TensorFlow / Keras
try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
except Exception as e:
    raise RuntimeError(
        "TensorFlow/Keras não está disponível neste ambiente. "
        "Instale com `pip install tensorflow` e reinicie o kernel.\n"
        f"Erro original: {e}"
    )

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_recall_fscore_support
from sklearn.preprocessing import StandardScaler
from IPython.display import display, Markdown

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

DATA_ROOT = Path("bases")  # ajuste se necessário

LABEL_ALIASES = ["label", "rotulo", "rótulo", "target", "y"]
DATE_ALIASES  = ["date", "data", "day", "dia"]
CLOSE_ALIASES = ["close", "fechamento", "close_price", "preco_fechamento", "preço_fechamento"]

WINDOW = 15
EPOCHS = 25
BATCH  = 64


In [None]:

# ================== FUNÇÕES ==================
def find_col(cols, aliases):
    for a in aliases:
        a_norm = a.lower()
        for c in cols:
            cl = c.strip().lower()
            if cl == a_norm or a_norm in cl:
                return c
    return None

def autodiscover_pairs(root: Path):
    pairs = {}
    for d in sorted([p for p in root.iterdir() if p.is_dir()]):
        tr, te = d/"treino.csv", d/"teste.csv"
        if tr.exists() and te.exists():
            pairs[d.name.upper()] = {"train": tr, "test": te}
    return pairs

def load_pair(train_path: Path, test_path: Path):
    df_tr = pd.read_csv(train_path); df_te = pd.read_csv(test_path)
    return df_tr, df_te

def prepare_xy(df: pd.DataFrame):
    cols = list(df.columns)
    label_col = find_col(cols, LABEL_ALIASES)
    date_col  = find_col(cols, DATE_ALIASES)
    close_col = find_col(cols, CLOSE_ALIASES)
    if label_col is None:
        raise ValueError("Não foi possível detectar a coluna de rótulo (use label/rotulo/target/y).")
    feat_cols = [c for c in cols if c != label_col and pd.api.types.is_numeric_dtype(df[c])]
    if len(feat_cols) < 15:
        raise ValueError(f"Menos de 15 features numéricas encontradas. Colunas numéricas: {feat_cols}")
    feat_cols = feat_cols[-15:]
    X = df[feat_cols].values
    y = df[label_col].astype(int).values
    sc = StandardScaler()
    X = sc.fit_transform(X)
    X3 = X.reshape(X.shape[0], X.shape[1], 1)
    return X3, y, {"label": label_col, "date": date_col, "close": close_col, "features": feat_cols}

def build_cnn1d(input_shape):
    m = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv1D(32, 3, padding="same", activation="relu"),
        layers.Conv1D(32, 3, padding="same", activation="relu"),
        layers.GlobalAveragePooling1D(),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="tanh")
    ])
    m.compile(optimizer=keras.optimizers.Adam(1e-3), loss="mse")
    return m

def build_lstm(input_shape):
    m = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.LSTM(32),
        layers.Dense(32, activation="relu"),
        layers.Dense(1, activation="tanh")
    ])
    m.compile(optimizer=keras.optimizers.Adam(1e-3), loss="mse")
    return m

def evaluate_and_show(model, Xte, yte, ticker, model_name, close_series=None):
    y_pred_cont = model.predict(Xte, verbose=0).ravel()
    y_pred = np.where(y_pred_cont >= 0, 1, -1)

    acc = accuracy_score(yte, y_pred)
    cm  = confusion_matrix(yte, y_pred, labels=[-1, 1])
    pre, rec, f1, sup = precision_recall_fscore_support(yte, y_pred, labels=[-1,1], zero_division=0)
    report = classification_report(yte, y_pred, target_names=["SELL(-1)", "BUY(+1)"], digits=4)

    display(Markdown(f"## {ticker} — {model_name}"))
    display(Markdown(f"**Acurácia (teste):** {acc:.4f}"))

    cm_df = pd.DataFrame(cm, index=["Verdade -1","Verdade +1"], columns=["Pred -1","Pred +1"])
    display(Markdown("**Matriz de confusão:**")); display(cm_df)

    pr_df = pd.DataFrame({
        "Classe": ["-1"," +1"],
        "Precision": [pre[0], pre[1]],
        "Recall": [rec[0], rec[1]],
        "F1": [f1[0], f1[1]],
        "Suporte": [sup[0], sup[1]],
    })
    display(Markdown("**Precision/Recall/F1:**")); display(pr_df)

    display(Markdown("**Classification report (texto):**")); print(report)

    # Backtest inline (se tiver close)
    if close_series is not None:
        close = close_series.astype(float).values
        ret_next = np.zeros_like(close, dtype=float)
        ret_next[:-1] = (close[1:] - close[:-1]) / np.where(close[:-1]==0, 1e-12, close[:-1])
        pos = y_pred.astype(float)
        strat_ret = np.zeros_like(ret_next)
        strat_ret[:-1] = pos[:-1] * ret_next[:-1]
        cum_bh = np.cumprod(1+ret_next) - 1
        cum_md = np.cumprod(1+strat_ret) - 1
        plt.figure(figsize=(6,4))
        plt.plot(cum_bh, label="Buy&Hold (aprox.)")
        plt.plot(cum_md, label=f"Estratégia {model_name}")
        plt.title(f"Backtest (curva acumulada) — {ticker}")
        plt.xlabel("Tempo"); plt.ylabel("Retorno acumulado"); plt.legend(); plt.show()

    return {
        "Ticker": ticker, "Modelo": model_name, "Accuracy": acc,
        "Precision(-1)": pre[0], "Recall(-1)": rec[0],
        "Precision(+1)": pre[1], "Recall(+1)": rec[1],
        "preds_df": pd.DataFrame({"y_true": yte, "y_pred": y_pred})
    }


In [None]:

# ================== EXECUÇÃO (inline) ==================
pairs = autodiscover_pairs(DATA_ROOT)
if not pairs:
    raise SystemExit(f"Nenhuma pasta encontrada em {DATA_ROOT.resolve()} com treino.csv/teste.csv.")

display(Markdown("### Ativos detectados:"))
for t, p in pairs.items():
    display(Markdown(f"- **{t}**: {p['train'].name} / {p['test'].name}"))

rows = []
preds_store = {}

for ticker, paths in pairs.items():
    df_tr, df_te = load_pair(paths["train"], paths["test"])
    Xtr, ytr, meta_tr = prepare_xy(df_tr)
    Xte, yte, meta_te = prepare_xy(df_te)

    # CNN 1D
    cnn = build_cnn1d(Xtr.shape[1:])
    _ = cnn.fit(Xtr, ytr, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH, verbose=0)
    r1 = evaluate_and_show(cnn, Xte, yte, ticker, "CNN1D",
                           close_series=df_te[meta_te["close"]] if meta_te["close"] else None)
    rows.append({k:v for k,v in r1.items() if k not in {"preds_df"}})
    preds_store.setdefault(ticker, {})["CNN1D"] = r1["preds_df"]

    # LSTM
    lstm = build_lstm(Xtr.shape[1:])
    _ = lstm.fit(Xtr, ytr, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH, verbose=0)
    r2 = evaluate_and_show(lstm, Xte, yte, ticker, "LSTM",
                           close_series=df_te[meta_te["close"]] if meta_te["close"] else None)
    rows.append({k:v for k,v in r2.items() if k not in {"preds_df"}})
    preds_store[ticker]["LSTM"] = r2["preds_df"]

# Consolidado
res_df = pd.DataFrame(rows).sort_values(["Ticker","Modelo"])
display(Markdown("## Consolidado — Métricas por ativo/modelo")); display(res_df)

# Comparativo com destaque
pivot = res_df.pivot(index="Ticker", columns="Modelo", values="Accuracy")
display(Markdown("## Comparativo de Acurácia (melhor por linha em destaque)"))
display(pivot.style.highlight_max(axis=1))

# Se quiser ver as predições de um par específico, descomente o exemplo:
# preds_store["BBAS3SA"]["CNN1D"].head()
