## This program extracts municipalities and their codes from Excel and inserts them into the municipalities table in the database

In [22]:
import pandas as pd
import os
import json
from sqlalchemy import create_engine, MetaData
from dotenv import dotenv_values

path = os.path.abspath(os.path.join(os.getcwd(), "../indicators_data"))
file = os.path.join(path, 'tx_rend_municipios_2022.xlsx')


metadata = MetaData()

config = dotenv_values("./.env")
username = config.get("DATABASE_USERNAME")
password = config.get("DATABASE_PASSWORD")
dbname = config.get("DATABASE_NAME")
port = config.get("DATABASE_PORT")
host = config.get("DATABASE_HOST")

engine = create_engine(f"postgresql+psycopg2://{username}:{password}@{host}:{port}/{dbname}", echo=True)

### Extracts the `id`, `nome` and `UF` columns and filters for the municipality of MG

In [2]:
df = pd.read_excel(file, skiprows=5, usecols=['UF', 'Código do Município', 'Nome do Município'])
df = df[df['UF'] == 'MG']
df = df.drop_duplicates()
df = df.rename(columns={
    'Código do Município':'id',
    'Nome do Município':'nome',
})

display(df)

Unnamed: 0,UF,id,nome
28354,MG,3100104,Abadia dos Dourados
28365,MG,3100203,Abaeté
28378,MG,3100302,Abre Campo
28390,MG,3100401,Acaiaca
28403,MG,3100500,Açucena
...,...,...,...
37797,MG,3171808,Virginópolis
37811,MG,3171907,Virgolândia
37819,MG,3172004,Visconde do Rio Branco
37833,MG,3172103,Volta Grande


### Converte dados dos id's dos municipios e nomes para dicionario e salva em um arquivo python

In [24]:
df['id'] = df['id'].astype(int)
municipios_map = dict(zip(df['nome'], df['id']))
municipios_map['Dona Eusébia'] = 3122900 # esse municipio possui duas formas de se escrever
print(municipios_map)

file_name = 'municipio_id.py'
content = f'municipio_map = {json.dumps(municipios_map, indent=4)}'

with open(file_name, 'w') as file:
    file.write(content)

print(f"\nDicionário salvo em '{file_name}'")


{'Abadia dos Dourados': 3100104, 'Abaeté': 3100203, 'Abre Campo': 3100302, 'Acaiaca': 3100401, 'Açucena': 3100500, 'Água Boa': 3100609, 'Água Comprida': 3100708, 'Aguanil': 3100807, 'Águas Formosas': 3100906, 'Águas Vermelhas': 3101003, 'Aimorés': 3101102, 'Aiuruoca': 3101201, 'Alagoa': 3101300, 'Albertina': 3101409, 'Além Paraíba': 3101508, 'Alfenas': 3101607, 'Alfredo Vasconcelos': 3101631, 'Almenara': 3101706, 'Alpercata': 3101805, 'Alpinópolis': 3101904, 'Alterosa': 3102001, 'Alto Caparaó': 3102050, 'Alto Rio Doce': 3102100, 'Alvarenga': 3102209, 'Alvinópolis': 3102308, 'Alvorada de Minas': 3102407, 'Amparo do Serra': 3102506, 'Andradas': 3102605, 'Cachoeira de Pajeú': 3102704, 'Andrelândia': 3102803, 'Angelândia': 3102852, 'Antônio Carlos': 3102902, 'Antônio Dias': 3103009, 'Antônio Prado de Minas': 3103108, 'Araçaí': 3103207, 'Aracitaba': 3103306, 'Araçuaí': 3103405, 'Araguari': 3103504, 'Arantina': 3103603, 'Araponga': 3103702, 'Araporã': 3103751, 'Arapuá': 3103801, 'Araújos': 3

In [21]:
#print(type(df['id'].iloc[5]))
print(df['id'].dtype)
print(df['id'].apply(type).value_counts())


int64
id
<class 'int'>    853
Name: count, dtype: int64


### Envia dados para o banco de dados

In [12]:
df.to_sql('Municipio', con=engine, if_exists='append', index=False)

2024-07-26 16:22:50,853 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2024-07-26 16:22:50,857 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-07-26 16:22:50,932 INFO sqlalchemy.engine.Engine select current_schema()
2024-07-26 16:22:50,934 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-07-26 16:22:50,999 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2024-07-26 16:22:51,007 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-07-26 16:22:51,076 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-07-26 16:22:51,246 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname

853