In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
#Definição de variáveis
pasta_datasets='../Datasets/'
pasta_planilhas='../Planilhas/'
planilha_dolar_original = 'planilha_dolar_bacen.csv'

try:
    os.mkdir(pasta_datasets)
    print(f'"A pasta {pasta_datasets} foi criada.') 
except FileExistsError:
    print(f'"A pasta {pasta_datasets} já existe.') 

"A pasta ../Datasets/ já existe.


In [3]:
# Verifica se o arquivo 'planilha_dolar_original.csv' existe
planilha_dolar_existe = os.path.isfile(pasta_planilhas+planilha_dolar_original)

if planilha_dolar_existe:
    print(f'O arquivo {planilha_dolar_original} existe.') 
else:
    print(f'O arquivo {planilha_dolar_original} não existe, execute o Notebook anterior.') 


O arquivo planilha_dolar_bacen.csv existe.


In [4]:
# Define os tipos dos dados das colunas de interesse

# Efetua a leitura do arquivo 'planilha_dolar_original.csv'  no dataframe 'df_dolar'
df_dolar = pd.read_csv(pasta_planilhas+planilha_dolar_original,sep = ',', decimal=',',low_memory=False, 
                       parse_dates=['dataHoraCotacao'],dayfirst = True,encoding='utf-8')

display(df_dolar)

df_dolar.info()


Unnamed: 0,cotacaoCompra,cotacaoVenda,dataHoraCotacao
0,2.6674,2.6682,2005-01-03 18:35:00.000
1,2.6879,2.6887,2005-01-04 17:40:00.000
2,2.7088,2.7096,2005-01-05 17:40:00.000
3,2.7199,2.7207,2005-01-06 17:49:00.000
4,2.7024,2.7032,2005-01-07 17:35:00.000
...,...,...,...
4263,5.6644,5.6650,2021-12-27 13:02:39.238
4264,5.6432,5.6438,2021-12-28 13:09:32.360
4265,5.6613,5.6619,2021-12-29 13:09:53.696
4266,5.5799,5.5805,2021-12-30 13:05:01.310


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4268 entries, 0 to 4267
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   cotacaoCompra    4268 non-null   float64       
 1   cotacaoVenda     4268 non-null   float64       
 2   dataHoraCotacao  4268 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(2)
memory usage: 100.2 KB


In [5]:
# Simplifica o nome da coluna dataHoraCotacao
colunas_renomeadas = {
                      'dataHoraCotacao' : 'Data'
                     }

df_dolar.rename(columns=colunas_renomeadas, inplace = True)

print(df_dolar.columns)
df_dolar.info()

Index(['cotacaoCompra', 'cotacaoVenda', 'Data'], dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4268 entries, 0 to 4267
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   cotacaoCompra  4268 non-null   float64       
 1   cotacaoVenda   4268 non-null   float64       
 2   Data           4268 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(2)
memory usage: 100.2 KB


In [6]:
# Verifica se há algum valor nulo no dataframe 'df_dolar'

df_dolar.isnull().values.any()

False

In [7]:
# Efetua a conversão do formato da coluna Data para o formato d/m/YYYY (exemplo: '2005-01-03 18:35:00.000' em '2005-01-03')
df_dolar['Data'] = pd.to_datetime(df_dolar['Data'].dt.strftime('%Y-%m-%d'), dayfirst = True)

display(df_dolar)

Unnamed: 0,cotacaoCompra,cotacaoVenda,Data
0,2.6674,2.6682,2005-01-03
1,2.6879,2.6887,2005-01-04
2,2.7088,2.7096,2005-01-05
3,2.7199,2.7207,2005-01-06
4,2.7024,2.7032,2005-01-07
...,...,...,...
4263,5.6644,5.6650,2021-12-27
4264,5.6432,5.6438,2021-12-28
4265,5.6613,5.6619,2021-12-29
4266,5.5799,5.5805,2021-12-30


In [8]:
# Cria um novo índice com a coluna 'Data' 
df_dolar.set_index(['Data'], inplace=True)

display(df_dolar)

Unnamed: 0_level_0,cotacaoCompra,cotacaoVenda
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-01-03,2.6674,2.6682
2005-01-04,2.6879,2.6887
2005-01-05,2.7088,2.7096
2005-01-06,2.7199,2.7207
2005-01-07,2.7024,2.7032
...,...,...
2021-12-27,5.6644,5.6650
2021-12-28,5.6432,5.6438
2021-12-29,5.6613,5.6619
2021-12-30,5.5799,5.5805


In [9]:
# Verifica a menor e a maior data no dataframe 'df_combustiveis'
menor_data = df_dolar.index.min()
maior_data = df_dolar.index.max()

print('Menor data no dataframe df_combustiveis: ' + str(menor_data))
print('Maior data no dataframe df_combustiveis: ' + str(maior_data))

Menor data no dataframe df_combustiveis: 2005-01-03 00:00:00
Maior data no dataframe df_combustiveis: 2021-12-31 00:00:00


In [10]:
# Exporta o dataset para um arquivo CSV
df_dolar.to_csv(pasta_datasets + 'dolar.csv', sep = ';',index=True)