Importando a biblioteca

In [1]:
import pandas as pd
import numpy as np
import statistics  as sts
from datetime import datetime
from datetime import timedelta
from bcb import currency
from bcb import sgs

Definindo função para tratamento dos dados

In [2]:
def tratar_dados(df: pd.DataFrame) -> None:
    """
    Esta função realiza o pré-processamento dos dados no DataFrame fornecido.

    Args:
        df (pd.DataFrame): O DataFrame a ser processado.

    Returns:
        None
    """

    # Remove duplicatas do DataFrame
    df.drop_duplicates(inplace=True)
    
    # Preenche valores ausentes em cada coluna com a mediana
    for coluna in df.columns:
        df[coluna] = df[coluna].fillna(df[coluna].median())

Configurando data atual

In [3]:
data_atual = str(datetime.now().strftime("%Y-%m-%d"))
data_atual

'2023-09-11'

Obtendo série temporal da taxa de câmbio do dolar pela api do bacen

In [4]:
dolar = currency.get(['USD'], start='1994-07-01', end=data_atual, side='ask') # bid = venda, ask = compra, both = compra e venda
dolar

Unnamed: 0_level_0,USD
Date,Unnamed: 1_level_1
1994-07-01,1.0000
1994-07-01,1.0000
1994-07-04,0.9400
1994-07-04,0.9400
1994-07-05,0.9350
...,...
2023-09-04,4.9176
2023-09-05,4.9705
2023-09-06,4.9762
2023-09-08,4.9835


Movendo as datas do índice do histórico de cotação do Dólar para uma coluna

In [5]:
dolar.reset_index(inplace=True)
dolar

Unnamed: 0_level_0,Date,USD,USD
Unnamed: 0_level_1,Unnamed: 1_level_1,bid,ask
0,1994-07-01,0.9000,1.0000
1,1994-07-01,0.9000,1.0000
2,1994-07-04,0.9350,0.9400
3,1994-07-04,0.9350,0.9400
4,1994-07-05,0.9300,0.9350
...,...,...,...
8469,2023-09-01,4.9312,4.9318
8470,2023-09-04,4.9170,4.9176
8471,2023-09-05,4.9699,4.9705
8472,2023-09-06,4.9756,4.9762


Tratando dados do histórico de cotação do Dólar 

In [6]:
tratar_dados(dolar)
dolar

Unnamed: 0_level_0,Date,USD,USD
Unnamed: 0_level_1,Unnamed: 1_level_1,bid,ask
0,1994-07-01,0.9000,1.0000
2,1994-07-04,0.9350,0.9400
4,1994-07-05,0.9300,0.9350
5,1994-07-05,0.9270,0.9320
6,1994-07-06,0.9250,0.9300
...,...,...,...
8469,2023-09-01,4.9312,4.9318
8470,2023-09-04,4.9170,4.9176
8471,2023-09-05,4.9699,4.9705
8472,2023-09-06,4.9756,4.9762


Obtendo série temporal do IPCA pela api do bacen

In [7]:
# Definindo datas para período de busca
ipca = sgs.get({'ipca': 433}, start = '1994-07-01', end=data_atual)
ipca

Unnamed: 0_level_0,ipca
Date,Unnamed: 1_level_1
1994-07-01,6.84
1994-08-01,1.86
1994-09-01,1.53
1994-10-01,2.62
1994-11-01,2.81
...,...
2023-03-01,0.71
2023-04-01,0.61
2023-05-01,0.23
2023-06-01,-0.08


Movendo as datas do índice do histórico de variação do IPCA para uma coluna

In [8]:
ipca.reset_index(inplace=True)
ipca

Unnamed: 0,Date,ipca
0,1994-07-01,6.84
1,1994-08-01,1.86
2,1994-09-01,1.53
3,1994-10-01,2.62
4,1994-11-01,2.81
...,...,...
344,2023-03-01,0.71
345,2023-04-01,0.61
346,2023-05-01,0.23
347,2023-06-01,-0.08


Tratando dados do histórico de variação do IPCA

In [16]:
tratar_dados(ipca)
ipca

Unnamed: 0,Date,ipca
0,1994-07-01,6.84
1,1994-08-01,1.86
2,1994-09-01,1.53
3,1994-10-01,2.62
4,1994-11-01,2.81
...,...,...
344,2023-03-01,0.71
345,2023-04-01,0.61
346,2023-05-01,0.23
347,2023-06-01,-0.08


Obtendo série temporal do IGP-M pela api do bacen

In [17]:
igpm = sgs.get({'igp-m': 189}, start = '1994-07-01', end=data_atual)
igpm

Unnamed: 0_level_0,igp-m
Date,Unnamed: 1_level_1
1994-07-01,4.33
1994-08-01,3.94
1994-09-01,1.75
1994-10-01,1.82
1994-11-01,2.85
...,...
2023-04-01,-0.95
2023-05-01,-1.84
2023-06-01,-1.93
2023-07-01,-0.72


Movendo as datas do índice do histórico de variação do IGP-M para uma coluna

In [11]:
igpm.reset_index(inplace=True)
igpm

Unnamed: 0,Date,igp-m
0,1994-07-01,4.33
1,1994-08-01,3.94
2,1994-09-01,1.75
3,1994-10-01,1.82
4,1994-11-01,2.85
...,...,...
345,2023-04-01,-0.95
346,2023-05-01,-1.84
347,2023-06-01,-1.93
348,2023-07-01,-0.72


Tratando dados do histórico de variação do IGP-M

In [12]:
tratar_dados(igpm)
igpm

Unnamed: 0,Date,igp-m
0,1994-07-01,4.33
1,1994-08-01,3.94
2,1994-09-01,1.75
3,1994-10-01,1.82
4,1994-11-01,2.85
...,...,...
345,2023-04-01,-0.95
346,2023-05-01,-1.84
347,2023-06-01,-1.93
348,2023-07-01,-0.72


Obtendo série temporal da Selic pela api do bacen

In [13]:
selic = sgs.get({'selic':432}, start = '1994-07-01', end=data_atual)
selic

Unnamed: 0_level_0,selic
Date,Unnamed: 1_level_1
1999-03-05,45.00
1999-03-06,45.00
1999-03-07,45.00
1999-03-08,45.00
1999-03-09,45.00
...,...
2023-09-07,13.25
2023-09-08,13.25
2023-09-09,13.25
2023-09-10,13.25


Movendo as datas do índice do histórico de variação da Selic para uma coluna

In [14]:
selic.reset_index(inplace=True)
selic

Unnamed: 0,Date,selic
0,1999-03-05,45.00
1,1999-03-06,45.00
2,1999-03-07,45.00
3,1999-03-08,45.00
4,1999-03-09,45.00
...,...,...
8952,2023-09-07,13.25
8953,2023-09-08,13.25
8954,2023-09-09,13.25
8955,2023-09-10,13.25


Tratando dados do histórico de variação da Selic

In [15]:
tratar_dados(selic)
selic

Unnamed: 0,Date,selic
0,1999-03-05,45.00
1,1999-03-06,45.00
2,1999-03-07,45.00
3,1999-03-08,45.00
4,1999-03-09,45.00
...,...,...
8952,2023-09-07,13.25
8953,2023-09-08,13.25
8954,2023-09-09,13.25
8955,2023-09-10,13.25
