# Manipulating external files to import on Tableau

## File(s):
`ibge_2019.csv` - original filepath: https://www.ibge.gov.br/cidades-e-estados

## Purpose:
Clean the data to import on Tableau and finish visualisation

## Strategy:
Rename the `FU+` column values and group them according to the column `UF` in the primary data source to apply a blend on Tableau

## Output:
`.csv` file to import on Tableau



In [1]:
import pandas as pd

In [28]:
df = pd.read_csv('ibge_2019.csv',header=2)

In [29]:
df

Unnamed: 0,FU [-],Code [-],Demonym [-],Governor [2019],Capital [2010],Area - km² [2019],Population Estimate - people [2020],Demographic density - inhab/km² [2010],Enrollment in primary education - enrollments [2018],HDI <span>Human Development Index</span> [2010],Realized revenue - R$ (×1000) [2017],Committed expenditure - R$ (×1000) [2017],Monthly household income per capita - R$ [2019],Total vehicles - vehicles [2018]
0,Acre,12,acriano,GLADSON DE LIMA CAMELI,Rio Branco,164123.964,894470,4.47,157646,0.663,6632883.0,6084417.0,890,277831
1,Alagoas,27,alagoano,JOSE RENAN VASCONCELOS CALHEIROS FILHO,Maceió,27843.295,3351543,112.33,490587,0.631,11950440.0,10460630.0,731,834827
2,Amapá,16,amapaense,ANTONIO WALDEZ GÓES DA SILVA,Macapá,142470.762,861773,4.69,136185,0.708,5396417.0,4224464.0,880,195039
3,Amazonas,13,amazonense,WILSON MIRANDA LIMA,Manaus,1559167.889,4207714,2.23,705007,0.674,17328460.0,15324900.0,842,883083
4,Bahia,29,baiano,RUI COSTA DOS SANTOS,Salvador,564760.427,14930634,24.82,2034711,0.66,50191000.0,45570160.0,913,4139107
5,Ceará,23,cearense,CAMILO SOBREIRA DE SANTANA,Fortaleza,148894.441,9187103,56.76,1198116,0.682,28420220.0,24608350.0,942,3148369
6,Distrito Federal,53,brasiliense,IBANEIS ROCHA BARROS JUNIOR,Brasília,5760.783,3055149,444.66,377622,0.824,23812210.0,21990460.0,2686,1812473
7,Espírito Santo,32,capixaba ou espírito-santense,JOSE RENATO CASAGRANDE,Vitória,46074.447,4064052,76.25,502059,0.74,19685620.0,14392340.0,1477,1936862
8,Goiás,52,goiano,RONALDO RAMOS CAIADO,Goiânia,340203.329,7113540,17.65,877593,0.735,37885340.0,24248380.0,1306,3909429
9,Maranhão,21,maranhense,FLÁVIO DINO DE CASTRO E COSTA,São Luís,329642.182,7114598,19.81,1178949,0.639,18503260.0,17627170.0,636,1696683


In [30]:
df.columns

Index(['FU [-]', 'Code [-]', 'Demonym [-]', 'Governor [2019]',
       'Capital [2010]', 'Area - km² [2019]',
       'Population Estimate - people [2020]',
       'Demographic density - inhab/km² [2010]',
       'Enrollment in primary education - enrollments [2018]',
       'HDI <span>Human Development Index</span> [2010]',
       'Realized revenue - R$ (×1000) [2017]',
       'Committed expenditure - R$ (×1000) [2017]',
       'Monthly household income per capita - R$ [2019]',
       'Total vehicles - vehicles [2018]'],
      dtype='object')

In [31]:
df['FU [-]'].unique()

array(['Acre', 'Alagoas', 'Amapá', 'Amazonas', 'Bahia', 'Ceará',
       'Distrito Federal', 'Espírito Santo', 'Goiás', 'Maranhão',
       'Mato Grosso', 'Mato Grosso do Sul', 'Minas Gerais', 'Pará',
       'Paraíba', 'Paraná', 'Pernambuco', 'Piauí', 'Rio de Janeiro',
       'Rio Grande do Norte', 'Rio Grande do Sul', 'Rondônia', 'Roraima',
       'Santa Catarina', 'São Paulo', 'Sergipe', 'Tocantins'],
      dtype=object)

In [32]:
states = {'Acre': 'AC',
          'Alagoas': 'AL',
          'Amapá': 'AP',
          'Amazonas': 'AM',
          'Bahia': 'BA',
          'Ceará': 'CE',
       'Distrito Federal': 'DF',
          'Espírito Santo': 'ES',
          'Goiás': 'GO',
          'Maranhão': 'MA',
       'Mato Grosso': 'MT',
          'Mato Grosso do Sul': 'MS',
          'Minas Gerais': 'MG',
          'Pará': 'PA',
       'Paraíba': 'PB',
          'Paraná': 'PR',
          'Pernambuco': 'PE',
          'Piauí': 'PI',
          'Rio de Janeiro': 'RJ',
       'Rio Grande do Norte': 'RN',
          'Rio Grande do Sul': 'RS',
          'Rondônia': 'RO',
          'Roraima': 'RR',
       'Santa Catarina': 'SC',
          'São Paulo': 'SP',
          'Sergipe': 'SE',
          'Tocantins': 'TO'}

In [35]:
df['FU [-]'].replace(to_replace=states,value=None,inplace=True)
df.columns

Index(['FU [-]', 'Code [-]', 'Demonym [-]', 'Governor [2019]',
       'Capital [2010]', 'Area - km² [2019]',
       'Population Estimate - people [2020]',
       'Demographic density - inhab/km² [2010]',
       'Enrollment in primary education - enrollments [2018]',
       'HDI <span>Human Development Index</span> [2010]',
       'Realized revenue - R$ (×1000) [2017]',
       'Committed expenditure - R$ (×1000) [2017]',
       'Monthly household income per capita - R$ [2019]',
       'Total vehicles - vehicles [2018]'],
      dtype='object')

In [36]:
columns = ['UF', 'Code [-]', 'Demonym [-]', 'Governor [2019]',
       'Capital [2010]', 'Area - km² [2019]',
       'Population Estimate - people [2020]',
       'Demographic density - inhab/km² [2010]',
       'Enrollment in primary education - enrollments [2018]',
       'HDI <span>Human Development Index</span> [2010]',
       'Realized revenue - R$ (×1000) [2017]',
       'Committed expenditure - R$ (×1000) [2017]',
       'Monthly household income per capita - R$ [2019]',
       'Total vehicles - vehicles [2018]']
df.columns = columns

In [37]:
df

Unnamed: 0,UF,Code [-],Demonym [-],Governor [2019],Capital [2010],Area - km² [2019],Population Estimate - people [2020],Demographic density - inhab/km² [2010],Enrollment in primary education - enrollments [2018],HDI <span>Human Development Index</span> [2010],Realized revenue - R$ (×1000) [2017],Committed expenditure - R$ (×1000) [2017],Monthly household income per capita - R$ [2019],Total vehicles - vehicles [2018]
0,AC,12,acriano,GLADSON DE LIMA CAMELI,Rio Branco,164123.964,894470,4.47,157646,0.663,6632883.0,6084417.0,890,277831
1,AL,27,alagoano,JOSE RENAN VASCONCELOS CALHEIROS FILHO,Maceió,27843.295,3351543,112.33,490587,0.631,11950440.0,10460630.0,731,834827
2,AP,16,amapaense,ANTONIO WALDEZ GÓES DA SILVA,Macapá,142470.762,861773,4.69,136185,0.708,5396417.0,4224464.0,880,195039
3,AM,13,amazonense,WILSON MIRANDA LIMA,Manaus,1559167.889,4207714,2.23,705007,0.674,17328460.0,15324900.0,842,883083
4,BA,29,baiano,RUI COSTA DOS SANTOS,Salvador,564760.427,14930634,24.82,2034711,0.66,50191000.0,45570160.0,913,4139107
5,CE,23,cearense,CAMILO SOBREIRA DE SANTANA,Fortaleza,148894.441,9187103,56.76,1198116,0.682,28420220.0,24608350.0,942,3148369
6,DF,53,brasiliense,IBANEIS ROCHA BARROS JUNIOR,Brasília,5760.783,3055149,444.66,377622,0.824,23812210.0,21990460.0,2686,1812473
7,ES,32,capixaba ou espírito-santense,JOSE RENATO CASAGRANDE,Vitória,46074.447,4064052,76.25,502059,0.74,19685620.0,14392340.0,1477,1936862
8,GO,52,goiano,RONALDO RAMOS CAIADO,Goiânia,340203.329,7113540,17.65,877593,0.735,37885340.0,24248380.0,1306,3909429
9,MA,21,maranhense,FLÁVIO DINO DE CASTRO E COSTA,São Luís,329642.182,7114598,19.81,1178949,0.639,18503260.0,17627170.0,636,1696683


In [38]:
df.to_csv('ibge_2019_clean.csv')