# 2. Pipeline de Ingestao de Dados do Yahoo Finance

In [9]:
import pandas as pd
import numpy as np
import yfinance as yf

In [11]:
def fetch_data(ticker, start_date, end_date):
    "Baixar dados historicos do Yahoo Finance"
    data = yf.download(ticker, start=start_date, end=end_date, progress=False)
    return data

raw_data = fetch_data("PETR4.SA", "2010-01-01", "2024-10-31")
print(raw_data.head())

Price                      Adj Close      Close       High        Low  \
Ticker                      PETR4.SA   PETR4.SA   PETR4.SA   PETR4.SA   
Date                                                                    
2010-01-04 00:00:00+00:00   9.984550  37.320000  37.320000  36.820000   
2010-01-05 00:00:00+00:00   9.898937  37.000000  37.430000  36.799999   
2010-01-06 00:00:00+00:00  10.032708  37.500000  37.500000  36.799999   
2010-01-07 00:00:00+00:00   9.939072  37.150002  37.450001  37.070000   
2010-01-08 00:00:00+00:00   9.885560  36.950001  37.389999  36.860001   

Price                           Open    Volume  
Ticker                      PETR4.SA  PETR4.SA  
Date                                            
2010-01-04 00:00:00+00:00  36.950001  13303600  
2010-01-05 00:00:00+00:00  37.380001  21396400  
2010-01-06 00:00:00+00:00  36.799999  18720600  
2010-01-07 00:00:00+00:00  37.270000  10964600  
2010-01-08 00:00:00+00:00  37.160000  14624200  


# 2.1 Limpeza de dados

In [13]:
def clean_data(data):
    "Limpeza de dados do mercado com ajuste de dados ausentes e de valores extremos"
    # Limpar linhas com dados ausentes
    data = data.dropna()
    # Remover extremos no preco 'Close' usando z-score
    z_scores = np.abs((data['Close'] - data['Close'].mean()) / data['Close'].std())
    data = data[z_scores < 3]
    # Converter (date) index para datetime se ainda nao foi convertido
    data.index = pd.to_datetime(data.index)
    return data

cleaned_data = clean_data(raw_data)
print(cleaned_data.head())


Price                      Adj Close      Close       High        Low  \
Ticker                      PETR4.SA   PETR4.SA   PETR4.SA   PETR4.SA   
Date                                                                    
2010-01-04 00:00:00+00:00   9.984550  37.320000  37.320000  36.820000   
2010-01-05 00:00:00+00:00   9.898937  37.000000  37.430000  36.799999   
2010-01-06 00:00:00+00:00  10.032708  37.500000  37.500000  36.799999   
2010-01-07 00:00:00+00:00   9.939072  37.150002  37.450001  37.070000   
2010-01-08 00:00:00+00:00   9.885560  36.950001  37.389999  36.860001   

Price                           Open    Volume  
Ticker                      PETR4.SA  PETR4.SA  
Date                                            
2010-01-04 00:00:00+00:00  36.950001  13303600  
2010-01-05 00:00:00+00:00  37.380001  21396400  
2010-01-06 00:00:00+00:00  36.799999  18720600  
2010-01-07 00:00:00+00:00  37.270000  10964600  
2010-01-08 00:00:00+00:00  37.160000  14624200  


# 2.2 Armazenamento de dados

In [15]:
import sqlite3

In [17]:
def store_data(data, db_name, table_name):
    "Armazenar dados limpos em uma base de dados SQLite"
    # Conectar a base de dados SQLite
    conn = sqlite3.connect(db_name)
    # Armazenar dados em uma tabela
    data.to_sql(table_name, conn, if_exists='replace', index=True)
    # Fechar conexao
    conn.close()

store_data(cleaned_data, "financial_data.db", "PETR3.SA")
print("Dados armazenados com sucesso!")


Dados armazenados com sucesso!


# 2.3 Automatizar o pipeline de ingestao, limpeza e armazenamento de dados

In [19]:
def data_pipeline(ticker, start_date, end_date, db_name, table_name):
    "Completar o pipeline de aquisicao, limpeza e armazenamento de dados financeiros"
    #Step 1: Aquisicao de dados
    raw_data = fetch_data(ticker, start_date, end_date)
    #Step 2: Limpeza de dados
    cleaned_data = clean_data(raw_data)
    #Step 3: Armazenamento de dados
    store_data(cleaned_data, db_name, table_name)
    print(f"Pipeline completo para {ticker}!")

data_pipeline("PETR4.SA", "2010-01-01", "2024-10-31", "financial_data.db", "PETR4.SA")

Pipeline completo para PETR4.SA!


# 3. Calculo de Volatilidade Historica e Volatilidade Implicita

# 3.1 Volatilidade Historica

In [21]:
def calculate_historical_volatility(ticker, start_date, end_date, window=30):
    "Calcular a Volatilidade historica de uma determinada acao."
    # Aquisicao de dados historicos
    data = yf.download(ticker, start=start_date, end=end_date, progress=False)
    # Calculo de log de retorno diario
    data['Log_Returns'] = np.log(data['Close'] / data['Close'].shift(1))
    # Calculo de desvio padrao do log de retorno (anualizado)
    data['Historical_Volatility'] = data['Log_Returns'].rolling(window).std() * np.sqrt(252)
    return data[['Close', 'Log_Returns', 'Historical_Volatility']]

historical_vol_data = calculate_historical_volatility("PETR4.SA", "2010-01-01", "2024-10-31")
print(historical_vol_data.tail())

Price                          Close Log_Returns Historical_Volatility
Ticker                      PETR4.SA                                  
Date                                                                  
2024-10-24 00:00:00+00:00  35.900002    0.006708              0.186282
2024-10-25 00:00:00+00:00  36.150002    0.006940              0.187280
2024-10-28 00:00:00+00:00  36.090000   -0.001661              0.182314
2024-10-29 00:00:00+00:00  36.009998   -0.002219              0.182041
2024-10-30 00:00:00+00:00  35.849998   -0.004453              0.168468


# 3.2 Volatilidada Implicita

In [23]:
from scipy.stats import norm
from scipy.optimize import brentq

In [25]:
def black_scholes(S, K, T, r, sigma, option_type="call"):
    "Calcular o preco da opcao Black-Scholes"
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    if option_type == "call":
        return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    elif option_type == "put":
        return K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)

def implied_volatility(S, K, T, r, market_price, option_type="call"):
    "Calcular a volatilidade implicita usando a formula de Black-Scholes"
    def objective_function(sigma):
        return black_scholes(S, K, T, r, sigma, option_type) - market_price

    # Resolver para a volatilidade implicita
    try:
        iv = brentq(objective_function, 1e-5, 5) # Limites iniciais de volatilidade
        return iv
    except ValueError:
        return np.nan

S = 38 # Valor da acao atual
K = 43 # Preco de Strike
T = 0.5 # Tempo para o vencimento (em anos)
r = 0.03 # Taxa livre de risco
market_price = 0.5 # Preco da opcao

implied_vol = implied_volatility(S, K, T, r, market_price, option_type="call")
print(f"Volatilidade Implicita: {implied_vol:.2%}")


Volatilidade Implicita: 17.26%


# 3.3 Combinar ambas as volatilidades

In [27]:
def calculate_volatility(ticker, start_date, end_date, option_data):
    "Calcular volatilidade historica e implicita"
    # Volatilidade Historica
    historical_data = calculate_historical_volatility(ticker, start_date, end_date)
    # Volatilidade Implicita para cada opcao
    implied_vols = []
    for option in option_data:
        S = option['S']
        K = option['K']
        T = option['T']
        r = option['r']
        market_price = option['market_price']
        iv = implied_volatility(S, K, T, r, market_price, option['type'])
        implied_vols.append(iv)
    return historical_data, implied_vols

# Dados das opcoes 
option_data = [
    {"S":150, "K":155, "T":0.5, "r":0.03, "market_price":5, "type": "call"},
    {"S":150, "K":145, "T":0.5, "r":0.03, "market_price":7, "type": "put"}
]

# Uso
historical_vol, implied_vol = calculate_volatility("PETR4.SA", "2010-01-01", "2024-10-31", 
                                                   option_data)
print("Volatilidade Historica:")
print(historical_vol.tail())
print("\nVolatilidade Implicita:")
print(implied_vol)



Volatilidade Historica:
Price                          Close Log_Returns Historical_Volatility
Ticker                      PETR4.SA                                  
Date                                                                  
2024-10-24 00:00:00+00:00  35.900002    0.006708              0.186282
2024-10-25 00:00:00+00:00  36.150002    0.006940              0.187280
2024-10-28 00:00:00+00:00  36.090000   -0.001661              0.182314
2024-10-29 00:00:00+00:00  36.009998   -0.002219              0.182041
2024-10-30 00:00:00+00:00  35.849998   -0.004453              0.168468

Volatilidade Implicita:
[0.14655776873586115, 0.24685930176474008]


# 4. Testar Estrutura de Dados em Tempo Real

In [29]:
import pandas as pd
import numpy as np
import time
from datetime import datetime
import yfinance as yf

In [31]:
def fetch_real_time_data(ticker):
    "Importar dados de acoes em tempo real do Yahoo Finance"
    try:
        data = yf.download(ticker, period="1d", interval="1m", progress=False)
        if data.empty:
            print(f"Sem dados importados para a acao {ticker}")
        return data
    except Exceptions as e:
        print(f"Error fetching data: {e}")
        return None


def validate_data_integrity(data):
    "Validar a integridade dos dados importados."
    "- Verificar dados ausentes"
    "- Confirmar que os timestamps sao consistentes"
    "- Validar tipos de dados"
    if data is None or data.empty:
         return {"status": "Falha", "issue": "Sem dados importados ou dados ausentes"}

    issues = []
    #Checar valores ausentes
    if data.isnull().any().any():
        issues.append("Dados contem valores ausentes")

    #Validar consistencia dos timestamps
    if not pd.to_datetime(data.index).is_monotonic_increasing:
        issues.append("Timestamps nao estao aumentando em ordem.")

    #Validar tipos de dados nas colunas
    expected_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    if not all(col in data.columns for col in expected_columns):
        issues.append("Coluna esperada ausente nos dados")

    if issues:
        return {"status": "Falha", "issues": issues}
    return {"status": "Sucesso", "issues": None}

def test_real_time_data(ticker, duration=60, interval=10):
    "Testar a atualizacao de dados em tempo real"
    "- Importar dados em intervalos regulares"
    "- Validar a integridade dos dados a cada passo"
    start_time = datetime.now()
    end_time = start_time + pd.Timedelta(seconds=duration)
    while datetime.now() < end_time:
        print(f"\nImportando dados de {ticker} em {datetime.now()}...")
        data = fetch_real_time_data(ticker)

        #Validar integridade dos dados
        validation_result = validate_data_integrity(data)
        if validation_result["status"] == "Sucesso":
            print(f"Integridade dos dados: APROVADA")
        else:
            print(f"Integridade dos dados: REPROVADA - Problemas: {validation_result['issues']
            }")

        #Pausa por intervalo definido
        time.sleep(interval)

    print("\nTeste em tempo real finalizado.")


test_real_time_data("PETR4.SA", duration=60, interval=10)


Importando dados de PETR4.SA em 2024-11-25 21:39:31.245875...
Integridade dos dados: APROVADA

Importando dados de PETR4.SA em 2024-11-25 21:39:41.888161...
Integridade dos dados: APROVADA

Importando dados de PETR4.SA em 2024-11-25 21:39:52.332960...
Integridade dos dados: APROVADA

Importando dados de PETR4.SA em 2024-11-25 21:40:02.770916...
Integridade dos dados: APROVADA

Importando dados de PETR4.SA em 2024-11-25 21:40:13.006313...
Integridade dos dados: APROVADA

Importando dados de PETR4.SA em 2024-11-25 21:40:23.323007...
Integridade dos dados: APROVADA

Teste em tempo real finalizado.


In [33]:
ticker = "PETR4.SA"
fetch_real_time_data(ticker)

Price,Adj Close,Close,High,Low,Open,Volume
Ticker,PETR4.SA,PETR4.SA,PETR4.SA,PETR4.SA,PETR4.SA,PETR4.SA
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2024-11-25 13:03:00+00:00,39.349998,39.349998,39.400002,39.299999,39.380001,0
2024-11-25 13:04:00+00:00,39.419998,39.419998,39.419998,39.349998,39.369999,100300
2024-11-25 13:05:00+00:00,39.389999,39.389999,39.419998,39.360001,39.419998,84100
2024-11-25 13:06:00+00:00,39.410000,39.410000,39.419998,39.389999,39.400002,60200
2024-11-25 13:07:00+00:00,39.369999,39.369999,39.419998,39.360001,39.419998,55100
...,...,...,...,...,...,...
2024-11-25 19:55:00+00:00,39.270000,39.270000,39.330002,39.250000,39.259998,282700
2024-11-25 19:56:00+00:00,39.290001,39.290001,39.320000,39.259998,39.270000,29500
2024-11-25 19:57:00+00:00,39.310001,39.310001,39.310001,39.290001,39.310001,73400
2024-11-25 19:58:00+00:00,39.270000,39.270000,39.310001,39.270000,39.290001,21600


# 5. Desenvolvimento de algoritmo de trading baseado em volatilidade

In [35]:
def calculate_volatility(ticker, start_date, end_date, option_data):
    "Calcular volatilidade historica e implicita"
    # Volatilidade Historica
    historical_data = calculate_historical_volatility(ticker, start_date, end_date)
    # Volatilidade Implicita para cada opcao
    implied_vols = []
    for option in option_data:
        S = option['S']
        K = option['K']
        T = option['T']
        r = option['r']
        market_price = option['market_price']
        iv = implied_volatility(S, K, T, r, market_price, option['type'])
        implied_vols.append(iv)
    return historical_data, implied_vols

# Dados das opcoes 
option_data = [
    {"S":39, "K":43, "T":0.5, "r":0.03, "market_price":5, "type": "call"}
]

# Uso
historical_vol, implied_vol = calculate_volatility("PETR4.SA", "2010-01-01", "2024-10-31", 
                                                   option_data)
print("Volatilidade Historica:")
print(historical_vol.tail())
print("\nVolatilidade Implicita:")
print(implied_vol)

iv = pd.DataFrame(implied_vol)
hv = pd.DataFrame(historical_vol)
historical_volatility = hv.Historical_Volatility
hv_value = historical_volatility.iloc[-1]
print("Volatilidade Historica:")
print(hv_value)

# Detectar oportunidades na variacao de preco
if implied_vol > hv_value:
    print("-> Volatilidade implicita esta em sobrepreco, considere vender volatilidade (ex. iron condor).")
else:
    print("-> Volatilidade implicita esta em sobpreco, considere comprar volatilidade (ex. long straddle).")


Volatilidade Historica:
Price                          Close Log_Returns Historical_Volatility
Ticker                      PETR4.SA                                  
Date                                                                  
2024-10-24 00:00:00+00:00  35.900002    0.006708              0.186282
2024-10-25 00:00:00+00:00  36.150002    0.006940              0.187280
2024-10-28 00:00:00+00:00  36.090000   -0.001661              0.182314
2024-10-29 00:00:00+00:00  36.009998   -0.002219              0.182041
2024-10-30 00:00:00+00:00  35.849998   -0.004453              0.168468

Volatilidade Implicita:
[0.5744237091257847]
Volatilidade Historica:
0.16846764493723385
-> Volatilidade implicita esta em sobrepreco, considere vender volatilidade (ex. iron condor).


# 5.1 Machine Learning para previsao de volatilidade

In [37]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

df = pd.DataFrame(historical_vol) 
print(df.tail())
returns = df.Log_Returns
print(returns.tail())

#Preparar dados 
X = returns.values.reshape(-1,1)[1:] #Retornos pregressos como feature
y = returns.shift(-1).dropna().values #Retorno do proximo dia como target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Treinar o modelo RandomForest
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

#Predizer retornos do proximo dia (para analise de volatilidade)
y_pred = model.predict(X_test)
print(f"Volatilidade esperada: {np.std(y_pred) * np.sqrt(252):.2%}")

Price                          Close Log_Returns Historical_Volatility
Ticker                      PETR4.SA                                  
Date                                                                  
2024-10-24 00:00:00+00:00  35.900002    0.006708              0.186282
2024-10-25 00:00:00+00:00  36.150002    0.006940              0.187280
2024-10-28 00:00:00+00:00  36.090000   -0.001661              0.182314
2024-10-29 00:00:00+00:00  36.009998   -0.002219              0.182041
2024-10-30 00:00:00+00:00  35.849998   -0.004453              0.168468
Date
2024-10-24 00:00:00+00:00    0.006708
2024-10-25 00:00:00+00:00    0.006940
2024-10-28 00:00:00+00:00   -0.001661
2024-10-29 00:00:00+00:00   -0.002219
2024-10-30 00:00:00+00:00   -0.004453
Name: Log_Returns, dtype: float64
Volatilidade esperada: 45.96%


# 6. Backtesting da estrategia em diferentes cenarios de mercado - VectorBT

# 7. Integracao com APIs externas 

# 8. Ajustes e otimizacao com base nos testes

# 9. Implementar interface de monitoramento

In [3]:
!pip install dash

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
   ---------------------------------------- 0.0/7.8 MB ? eta -:--:--
   -- ------------------------------------- 0.5/7.8 MB 2.4 MB/s eta 0:00:04
   ----- ---------------------------------- 1.0/7.8 MB 2.5 MB/s eta 0:00:03
   -------- ------------------------------- 1.6/7.8 MB 2.5 MB/s eta 0:00:03
   ---------- ----------------------------- 2.1/7.8 MB 2.6 MB/s eta 0:00:03
   ------------- -------------------------- 

In [39]:
from dash import Dash, dcc, html
import plotly.express as px

# Dados de volatilidade historica
fig = px.line(historical_volatility, x=historical_volatility.index, y='Historical_Volatility', 
              title='Volatilidade Historica')

# Inicializar o aplicativo Dash
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Dashboard de Volatilidade"),
    dcc.Graph(figure=fig),
    dcc.Dropdown(
        id='ticker-dropdown',
        options=[{'label': 'PETR4.SA', 'value': 'PETR4.SA'}, {'label': 'VALE3.SA', 'value': 'VALE3.SA'}],
        value='PETR4.SA'
    )
])

if __name__ == '__main__':
    app.run_server(debug=True)

