In [1]:
# MVP Hello World: Coleta simples e modelo mínimo
import yfinance as yf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
import tensorflow as tf
import joblib
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import shutil
    




In [2]:
# Desabilita a GPU para o TensorFlow não tentar usá-la e falhar
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
TICKER = 'PETR4.SA'  # Ou 'PETR4.SA' para Petrobras
START = '2018-01-01'
END = '2024-12-31'

In [4]:
def fetch_data(symbol=TICKER, start=START, end=END):
    """Coleta dados brutos."""
    df = yf.download(symbol, start=start, end=end)
    return df

def scaling_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)
    return scaled_data, scaler

def create_sequences(data, window=60):
    """Cria janelas deslizantes para LSTM."""
    X, y = [], []
    for i in range(len(data) - window):
        X.append(data[i:i+window])
        y.append(data[i+window])
    return np.array(X), np.array(y)

def preprocess_pipeline(df, window=60):
    scaled_data, scaler = scaling_data(df)    
    X, y = create_sequences(scaled_data, window=window)

    
    # Split treino/validação/teste (60/20/20)
    train_size = int(len(X) * 0.6)
    val_size = int(len(X) * 0.8)
    
    X_train, y_train = X[:train_size], y[:train_size]
    X_val, y_val = X[train_size:val_size], y[train_size:val_size]
    X_test, y_test = X[val_size:], y[val_size:]
    
    return X_train, y_train, X_val, y_val, X_test, y_test, scaler

def build_model(input_shape):
    model = Sequential()
    # Camada LSTM com retorno de sequência (para empilhar outra LSTM se quiser)
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2)) # Evita Overfitting
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1)) # Saída: O preço previsto
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [5]:
print("Baixando dados...")
df = fetch_data(TICKER, START, END)
df.head(5)

Baixando dados...


Failed to get ticker 'PETR4.SA' reason: Expecting value: line 1 column 1 (char 0)
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['PETR4.SA']: Exception('%ticker%: No timezone found, symbol may be delisted')


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [6]:
df.shape

(0, 6)

In [7]:
print("Pré-processamento")
close = df[['Close']]
X_train, y_train, X_val, y_val, X_test, y_test, scaler = preprocess_pipeline(close)

Pré-processamento


ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by MinMaxScaler.

In [None]:
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"X_val: {X_val.shape}")
print(f"y_val: {y_val.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_test: {y_test.shape}")

In [None]:
# Configura o experimento 
epochs = 40
batch_size = 32


# 1. LOGUE PARÂMETROS PRIMEIRO (Segurança)
# Se o treino falhar, pelo menos você sabe qual configuração causou o erro.


print("Treinando Modelo...")
# Construção do modelo (assumindo que build_model, X_train, etc. já existem)
model = build_model((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train, 
    epochs=epochs, 
    batch_size=batch_size, 
    verbose=1, 
    validation_data=(X_val, y_val)
)

# 2. SALVAR ARTEFATOS (MODELO E SCALER)
print("Salvando artefatos...")

# Define o caminho
local_model_dir = "../modelos"
os.makedirs(local_model_dir, exist_ok=True) # <--- O SEGREDO ESTÁ AQUI
    
# Salva localmente
model_path = f"{local_model_dir}/lstm_mvp.keras"
model.save(model_path)

scaler_path = f"{local_model_dir}/scaler.pkl"
joblib.dump(scaler, scaler_path)

# 3. AVALIAÇÃO E MÉTRICAS
print("Avaliando...")
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)
y_test_inv = scaler.inverse_transform(y_test)

mae = mean_absolute_error(y_test_inv, predictions)
rmse = np.sqrt(mean_squared_error(y_test_inv, predictions))
# Correção para evitar divisão por zero no MAPE se houver valores 0
with np.errstate(divide='ignore', invalid='ignore'):
    mape = np.mean(np.abs((y_test_inv - predictions) / y_test_inv)) * 100
    if np.isnan(mape): mape = 0.0

metrics = {"mae": mae, "rmse": rmse, "mape": mape}

print(f"Métricas: {metrics}")

# 4. PLOTS (A ordem importa!)
print("Gerando gráficos...")

# -- Gráfico 1: Loss --
plt.figure(figsize=(12,6))
plt.plot(history.history['loss'], label='Treino')
plt.plot(history.history['val_loss'], label='Validação')
plt.title('Curva de Aprendizado')
plt.legend()
plt.show()

# -- Gráfico 2: Predição --
plt.figure(figsize=(12,6))
plt.plot(y_test_inv, label='Real')
plt.plot(predictions, label='Predito')
plt.title('Real vs Predito')
plt.legend() # O .legend() puxa os labels definidos no .plot acima
plt.show()

print("Processo finalizado com sucesso.")